16. Análise de Dados - Parte 2: Processamento de Dados
Esta seção aborda o processamento dos dados coletados em soluções web desenvolvidas com Astro 5, incluindo armazenamento, transformação e análise.
Armazenamento de Dados
Opções de Armazenamento
1. Bancos de Dados
Supabase (PostgreSQL)
import { createClient } from '@supabase/supabase-js';
const supabaseUrl = import.meta.env.PUBLIC_SUPABASE_URL;const supabaseKey = import.meta.env.SUPABASE_SERVICE_KEY;
export const supabase = createClient(supabaseUrl, supabaseKey);
export async function storeAnalyticsEvent(event) { const { error } = await supabase .from('analytics_events') .insert([event]);
if (error) { console.error('Error storing analytics event:', error); throw error; }}
export async function queryEvents(options = {}) { const { startDate, endDate, eventName, limit = 100, page = 1, } = options;
let query = supabase .from('analytics_events') .select('*');
if (eventName) { query = query.eq('name', eventName); }
if (startDate) { query = query.gte('timestamp', startDate); }
if (endDate) { query = query.lte('timestamp', endDate); }
const { data, error, count } = await query .order('timestamp', { ascending: false }) .range((page - 1) * limit, page * limit - 1) .select('*', { count: 'exact' });
if (error) { console.error('Error querying analytics events:', error); throw error; }
return { data, count };}Schema SQL para PostgreSQL
-- analytics_schema.sqlCREATE TABLE analytics_events ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), name TEXT NOT NULL, data JSONB NOT NULL DEFAULT '{}', timestamp TIMESTAMPTZ NOT NULL, url TEXT, referrer TEXT, session_id TEXT, client_id TEXT, user_agent TEXT, screen_size TEXT, language TEXT, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW());
-- Índices para consultas comunsCREATE INDEX idx_analytics_events_name ON analytics_events(name);CREATE INDEX idx_analytics_events_timestamp ON analytics_events(timestamp);CREATE INDEX idx_analytics_events_client_id ON analytics_events(client_id);CREATE INDEX idx_analytics_events_session_id ON analytics_events(session_id);
-- Visualização para sessõesCREATE VIEW analytics_sessions ASSELECT session_id, MIN(timestamp) AS start_time, MAX(timestamp) AS end_time, MAX(timestamp) - MIN(timestamp) AS duration, COUNT(*) AS event_count, MIN(client_id) AS client_id, MIN(user_agent) AS user_agent, MIN(language) AS language, MIN(screen_size) AS screen_sizeFROM analytics_eventsGROUP BY session_id;
-- Visualização para pageviewsCREATE VIEW analytics_pageviews ASSELECT id, timestamp, session_id, client_id, data->>'path' AS path, data->>'title' AS title, url, referrerFROM analytics_eventsWHERE name = 'pageview';2. Armazenamento de Arquivos
Logs em Arquivos JSON
import fs from 'node:fs/promises';import path from 'node:path';
const LOG_DIR = path.join(process.cwd(), 'logs', 'analytics');
export async function logEvent(event) { try { // Garantir que o diretório de logs existe await fs.mkdir(LOG_DIR, { recursive: true });
// Criar nome de arquivo baseado na data const date = new Date(); const fileName = `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}-${String(date.getDate()).padStart(2, '0')}.json`; const filePath = path.join(LOG_DIR, fileName);
// Verificar se o arquivo já existe let events = []; try { const fileContent = await fs.readFile(filePath, 'utf-8'); events = JSON.parse(fileContent); } catch (error) { // Arquivo não existe ou não é válido, criar novo }
// Adicionar evento ao array events.push({ ...event, logged_at: new Date().toISOString() });
// Salvar arquivo await fs.writeFile(filePath, JSON.stringify(events, null, 2));
return true; } catch (error) { console.error('Error logging analytics event:', error); return false; }}
export async function readLogs(date) { try { const targetDate = date || new Date(); const fileName = `${targetDate.getFullYear()}-${String(targetDate.getMonth() + 1).padStart(2, '0')}-${String(targetDate.getDate()).padStart(2, '0')}.json`; const filePath = path.join(LOG_DIR, fileName);
const fileContent = await fs.readFile(filePath, 'utf-8'); return JSON.parse(fileContent); } catch (error) { console.error('Error reading analytics logs:', error); return []; }}Processamento e Transformação de Dados
ETL (Extract, Transform, Load)
import { readLogs } from '../src/api/analytics-log.js';import { supabase } from '../src/utils/supabase.js';
async function processLogs(date) { try { console.log(`Processing logs for ${date.toISOString().split('T')[0]}...`);
// Extrair logs do arquivo const logs = await readLogs(date); console.log(`Found ${logs.length} events to process.`);
// Transformar eventos const transformedEvents = logs.map(event => { // Exemplo de transformação: enriquecimento de dados return { ...event, processed: true, processed_at: new Date().toISOString(), // Adicionar dados derivados day_of_week: new Date(event.timestamp).getDay(), hour_of_day: new Date(event.timestamp).getHours(), }; });
// Carregar em banco de dados if (transformedEvents.length > 0) { // Inserir em lotes de 100 const batchSize = 100; for (let i = 0; i < transformedEvents.length; i += batchSize) { const batch = transformedEvents.slice(i, i + batchSize);
const { error } = await supabase .from('processed_analytics') .insert(batch);
if (error) { console.error('Error loading batch to database:', error); } else { console.log(`Loaded batch ${i / batchSize + 1} of ${Math.ceil(transformedEvents.length / batchSize)}`); } } }
console.log('Processing completed successfully.'); return true; } catch (error) { console.error('Error processing analytics logs:', error); return false; }}
// Processar logs do dia anteriorconst yesterday = new Date();yesterday.setDate(yesterday.getDate() - 1);processLogs(yesterday);Agregação de Dados
import { supabase } from './supabase.js';
// Agregação de pageviews por diaexport async function getPageviewsByDay(startDate, endDate) { const { data, error } = await supabase .rpc('get_pageviews_by_day', { start_date: startDate, end_date: endDate });
if (error) { console.error('Error getting pageviews by day:', error); throw error; }
return data;}
// Agregação de eventos por tipoexport async function getEventCountsByType(startDate, endDate) { const { data, error } = await supabase .rpc('get_event_counts_by_type', { start_date: startDate, end_date: endDate });
if (error) { console.error('Error getting event counts by type:', error); throw error; }
return data;}
// Cálculo de taxa de conversãoexport async function getConversionRate(startDate, endDate, startEventName, conversionEventName) { const { data, error } = await supabase .rpc('get_conversion_rate', { start_date: startDate, end_date: endDate, start_event: startEventName, conversion_event: conversionEventName });
if (error) { console.error('Error calculating conversion rate:', error); throw error; }
return data[0] || { conversion_rate: 0, start_count: 0, conversion_count: 0 };}Funções SQL para Agregação
-- analytics_functions.sql
-- Função para obter pageviews por diaCREATE OR REPLACE FUNCTION get_pageviews_by_day(start_date DATE, end_date DATE)RETURNS TABLE ( day DATE, pageview_count BIGINT) AS $$BEGIN RETURN QUERY SELECT DATE(timestamp) AS day, COUNT(*) AS pageview_count FROM analytics_events WHERE name = 'pageview' AND timestamp >= start_date AND timestamp <= (end_date + INTERVAL '1 day') GROUP BY DATE(timestamp) ORDER BY day;END;$$ LANGUAGE plpgsql;
-- Função para obter contagem de eventos por tipoCREATE OR REPLACE FUNCTION get_event_counts_by_type(start_date DATE, end_date DATE)RETURNS TABLE ( event_name TEXT, event_count BIGINT) AS $$BEGIN RETURN QUERY SELECT name AS event_name, COUNT(*) AS event_count FROM analytics_events WHERE timestamp >= start_date AND timestamp <= (end_date + INTERVAL '1 day') GROUP BY name ORDER BY event_count DESC;END;$$ LANGUAGE plpgsql;
-- Função para calcular taxa de conversãoCREATE OR REPLACE FUNCTION get_conversion_rate( start_date DATE, end_date DATE, start_event TEXT, conversion_event TEXT)RETURNS TABLE ( start_count BIGINT, conversion_count BIGINT, conversion_rate NUMERIC) AS $$BEGIN RETURN QUERY WITH start_events AS ( SELECT client_id, MIN(timestamp) AS first_timestamp FROM analytics_events WHERE name = start_event AND timestamp >= start_date AND timestamp <= (end_date + INTERVAL '1 day') GROUP BY client_id ), conversion_events AS ( SELECT client_id, MIN(timestamp) AS first_timestamp FROM analytics_events WHERE name = conversion_event AND timestamp >= start_date AND timestamp <= (end_date + INTERVAL '1 day') GROUP BY client_id ), converted_users AS ( SELECT s.client_id FROM start_events s JOIN conversion_events c ON s.client_id = c.client_id AND c.first_timestamp >= s.first_timestamp ) SELECT COUNT(DISTINCT s.client_id) AS start_count, COUNT(DISTINCT cu.client_id) AS conversion_count, CASE WHEN COUNT(DISTINCT s.client_id) = 0 THEN 0 ELSE ROUND((COUNT(DISTINCT cu.client_id)::NUMERIC / COUNT(DISTINCT s.client_id)::NUMERIC) * 100, 2) END AS conversion_rate FROM start_events s LEFT JOIN converted_users cu ON s.client_id = cu.client_id;END;$$ LANGUAGE plpgsql;Análise de Dados
Cálculo de Métricas-Chave
import { supabase } from './supabase.js';
// Usuários ativosexport async function getActiveUsers(period = 'day') { let timeFrame;
switch (period) { case 'day': timeFrame = '1 day'; break; case 'week': timeFrame = '7 days'; break; case 'month': timeFrame = '30 days'; break; default: timeFrame = '1 day'; }
const { data, error } = await supabase .rpc('get_active_users', { time_frame: timeFrame });
if (error) { console.error(`Error getting ${period} active users:`, error); throw error; }
return data[0]?.active_users || 0;}
// Tempo médio na páginaexport async function getAverageTimeOnPage(path = null, startDate, endDate) { const { data, error } = await supabase .rpc('get_average_time_on_page', { page_path: path, start_date: startDate, end_date: endDate });
if (error) { console.error('Error getting average time on page:', error); throw error; }
return data;}
// Taxa de rejeiçãoexport async function getBounceRate(startDate, endDate) { const { data, error } = await supabase .rpc('get_bounce_rate', { start_date: startDate, end_date: endDate });
if (error) { console.error('Error getting bounce rate:', error); throw error; }
return data[0]?.bounce_rate || 0;}Funções SQL para Métricas
-- analytics_metrics.sql
-- Função para obter usuários ativosCREATE OR REPLACE FUNCTION get_active_users(time_frame INTERVAL)RETURNS TABLE ( active_users BIGINT) AS $$BEGIN RETURN QUERY SELECT COUNT(DISTINCT client_id) AS active_users FROM analytics_events WHERE timestamp >= NOW() - time_frame;END;$$ LANGUAGE plpgsql;
-- Função para obter tempo médio na páginaCREATE OR REPLACE FUNCTION get_average_time_on_page( page_path TEXT, start_date DATE, end_date DATE)RETURNS TABLE ( path TEXT, avg_time_seconds NUMERIC, pageview_count BIGINT) AS $$BEGIN RETURN QUERY WITH page_sessions AS ( SELECT session_id, data->>'path' AS path, timestamp AS view_time, LEAD(timestamp) OVER (PARTITION BY session_id ORDER BY timestamp) AS next_event_time FROM analytics_events WHERE timestamp >= start_date AND timestamp <= (end_date + INTERVAL '1 day') AND (page_path IS NULL OR data->>'path' = page_path) ) SELECT path, ROUND(AVG(EXTRACT(EPOCH FROM (next_event_time - view_time)))) AS avg_time_seconds, COUNT(*) AS pageview_count FROM page_sessions WHERE next_event_time IS NOT NULL AND EXTRACT(EPOCH FROM (next_event_time - view_time)) BETWEEN 1 AND 3600 -- Filtrar tempos irrealistas (1s a 1h) GROUP BY path ORDER BY avg_time_seconds DESC;END;$$ LANGUAGE plpgsql;
-- Função para calcular taxa de rejeiçãoCREATE OR REPLACE FUNCTION get_bounce_rate( start_date DATE, end_date DATE)RETURNS TABLE ( total_sessions BIGINT, bounce_sessions BIGINT, bounce_rate NUMERIC) AS $$BEGIN RETURN QUERY WITH session_counts AS ( SELECT session_id, COUNT(*) AS event_count FROM analytics_events WHERE timestamp >= start_date AND timestamp <= (end_date + INTERVAL '1 day') GROUP BY session_id ) SELECT COUNT(*) AS total_sessions, SUM(CASE WHEN event_count = 1 THEN 1 ELSE 0 END) AS bounce_sessions, ROUND((SUM(CASE WHEN event_count = 1 THEN 1 ELSE 0 END)::NUMERIC / COUNT(*)::NUMERIC) * 100, 2) AS bounce_rate FROM session_counts;END;$$ LANGUAGE plpgsql;Integração com Ferramentas de BI
Conexão com Metabase
import { supabase } from '../src/utils/supabase.js';import fetch from 'node-fetch';
const METABASE_URL = process.env.METABASE_URL;const METABASE_USERNAME = process.env.METABASE_USERNAME;const METABASE_PASSWORD = process.env.METABASE_PASSWORD;
async function exportToMetabase() { try { // Autenticar com Metabase const authResponse = await fetch(`${METABASE_URL}/api/session`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ username: METABASE_USERNAME, password: METABASE_PASSWORD }) });
const { id: sessionToken } = await authResponse.json();
// Criar ou atualizar coleção const collectionResponse = await fetch(`${METABASE_URL}/api/collection`, { method: 'POST', headers: { 'Content-Type': 'application/json', 'X-Metabase-Session': sessionToken }, body: JSON.stringify({ name: 'Web Analytics', color: '#509EE3', description: 'Analytics data from our website' }) });
const collection = await collectionResponse.json();
// Criar ou atualizar perguntas (queries) const queries = [ { name: 'Daily Pageviews', description: 'Number of pageviews per day', query: { 'source-table': 'analytics_events', aggregation: [['count']], breakout: [['field', 'timestamp', { 'temporal-unit': 'day' }]], filter: ['=', ['field', 'name'], 'pageview'] } }, { name: 'Top Pages', description: 'Most viewed pages', query: { 'source-table': 'analytics_events', aggregation: [['count']], breakout: [['field', ['field', 'data'], 'path']], filter: ['=', ['field', 'name'], 'pageview'], 'order-by': [['desc', ['aggregation', 0]]], limit: 10 } } ];
for (const queryDef of queries) { await fetch(`${METABASE_URL}/api/card`, { method: 'POST', headers: { 'Content-Type': 'application/json', 'X-Metabase-Session': sessionToken }, body: JSON.stringify({ name: queryDef.name, description: queryDef.description, display: 'line', visualization_settings: {}, dataset_query: { type: 'query', query: queryDef.query, database: 1 // ID do banco de dados no Metabase }, collection_id: collection.id }) }); }
console.log('Successfully exported queries to Metabase'); } catch (error) { console.error('Error exporting to Metabase:', error); }}
exportToMetabase();Continua na Parte 3: Visualização e Privacidade de Dados