Pular para o conteúdo

16. Análise de Dados - Parte 2: Processamento de Dados

Esta seção aborda o processamento dos dados coletados em soluções web desenvolvidas com Astro 5, incluindo armazenamento, transformação e análise.

Armazenamento de Dados

Opções de Armazenamento

1. Bancos de Dados

Supabase (PostgreSQL)

src/utils/supabase.js
import { createClient } from '@supabase/supabase-js';
const supabaseUrl = import.meta.env.PUBLIC_SUPABASE_URL;
const supabaseKey = import.meta.env.SUPABASE_SERVICE_KEY;
export const supabase = createClient(supabaseUrl, supabaseKey);
export async function storeAnalyticsEvent(event) {
const { error } = await supabase
.from('analytics_events')
.insert([event]);
if (error) {
console.error('Error storing analytics event:', error);
throw error;
}
}
export async function queryEvents(options = {}) {
const {
startDate,
endDate,
eventName,
limit = 100,
page = 1,
} = options;
let query = supabase
.from('analytics_events')
.select('*');
if (eventName) {
query = query.eq('name', eventName);
}
if (startDate) {
query = query.gte('timestamp', startDate);
}
if (endDate) {
query = query.lte('timestamp', endDate);
}
const { data, error, count } = await query
.order('timestamp', { ascending: false })
.range((page - 1) * limit, page * limit - 1)
.select('*', { count: 'exact' });
if (error) {
console.error('Error querying analytics events:', error);
throw error;
}
return { data, count };
}

Schema SQL para PostgreSQL

-- analytics_schema.sql
CREATE TABLE analytics_events (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name TEXT NOT NULL,
data JSONB NOT NULL DEFAULT '{}',
timestamp TIMESTAMPTZ NOT NULL,
url TEXT,
referrer TEXT,
session_id TEXT,
client_id TEXT,
user_agent TEXT,
screen_size TEXT,
language TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- Índices para consultas comuns
CREATE INDEX idx_analytics_events_name ON analytics_events(name);
CREATE INDEX idx_analytics_events_timestamp ON analytics_events(timestamp);
CREATE INDEX idx_analytics_events_client_id ON analytics_events(client_id);
CREATE INDEX idx_analytics_events_session_id ON analytics_events(session_id);
-- Visualização para sessões
CREATE VIEW analytics_sessions AS
SELECT
session_id,
MIN(timestamp) AS start_time,
MAX(timestamp) AS end_time,
MAX(timestamp) - MIN(timestamp) AS duration,
COUNT(*) AS event_count,
MIN(client_id) AS client_id,
MIN(user_agent) AS user_agent,
MIN(language) AS language,
MIN(screen_size) AS screen_size
FROM analytics_events
GROUP BY session_id;
-- Visualização para pageviews
CREATE VIEW analytics_pageviews AS
SELECT
id,
timestamp,
session_id,
client_id,
data->>'path' AS path,
data->>'title' AS title,
url,
referrer
FROM analytics_events
WHERE name = 'pageview';

2. Armazenamento de Arquivos

Logs em Arquivos JSON

src/api/analytics-log.js
import fs from 'node:fs/promises';
import path from 'node:path';
const LOG_DIR = path.join(process.cwd(), 'logs', 'analytics');
export async function logEvent(event) {
try {
// Garantir que o diretório de logs existe
await fs.mkdir(LOG_DIR, { recursive: true });
// Criar nome de arquivo baseado na data
const date = new Date();
const fileName = `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}-${String(date.getDate()).padStart(2, '0')}.json`;
const filePath = path.join(LOG_DIR, fileName);
// Verificar se o arquivo já existe
let events = [];
try {
const fileContent = await fs.readFile(filePath, 'utf-8');
events = JSON.parse(fileContent);
} catch (error) {
// Arquivo não existe ou não é válido, criar novo
}
// Adicionar evento ao array
events.push({
...event,
logged_at: new Date().toISOString()
});
// Salvar arquivo
await fs.writeFile(filePath, JSON.stringify(events, null, 2));
return true;
} catch (error) {
console.error('Error logging analytics event:', error);
return false;
}
}
export async function readLogs(date) {
try {
const targetDate = date || new Date();
const fileName = `${targetDate.getFullYear()}-${String(targetDate.getMonth() + 1).padStart(2, '0')}-${String(targetDate.getDate()).padStart(2, '0')}.json`;
const filePath = path.join(LOG_DIR, fileName);
const fileContent = await fs.readFile(filePath, 'utf-8');
return JSON.parse(fileContent);
} catch (error) {
console.error('Error reading analytics logs:', error);
return [];
}
}

Processamento e Transformação de Dados

ETL (Extract, Transform, Load)

scripts/process-analytics.js
import { readLogs } from '../src/api/analytics-log.js';
import { supabase } from '../src/utils/supabase.js';
async function processLogs(date) {
try {
console.log(`Processing logs for ${date.toISOString().split('T')[0]}...`);
// Extrair logs do arquivo
const logs = await readLogs(date);
console.log(`Found ${logs.length} events to process.`);
// Transformar eventos
const transformedEvents = logs.map(event => {
// Exemplo de transformação: enriquecimento de dados
return {
...event,
processed: true,
processed_at: new Date().toISOString(),
// Adicionar dados derivados
day_of_week: new Date(event.timestamp).getDay(),
hour_of_day: new Date(event.timestamp).getHours(),
};
});
// Carregar em banco de dados
if (transformedEvents.length > 0) {
// Inserir em lotes de 100
const batchSize = 100;
for (let i = 0; i < transformedEvents.length; i += batchSize) {
const batch = transformedEvents.slice(i, i + batchSize);
const { error } = await supabase
.from('processed_analytics')
.insert(batch);
if (error) {
console.error('Error loading batch to database:', error);
} else {
console.log(`Loaded batch ${i / batchSize + 1} of ${Math.ceil(transformedEvents.length / batchSize)}`);
}
}
}
console.log('Processing completed successfully.');
return true;
} catch (error) {
console.error('Error processing analytics logs:', error);
return false;
}
}
// Processar logs do dia anterior
const yesterday = new Date();
yesterday.setDate(yesterday.getDate() - 1);
processLogs(yesterday);

Agregação de Dados

src/utils/analytics-aggregation.js
import { supabase } from './supabase.js';
// Agregação de pageviews por dia
export async function getPageviewsByDay(startDate, endDate) {
const { data, error } = await supabase
.rpc('get_pageviews_by_day', {
start_date: startDate,
end_date: endDate
});
if (error) {
console.error('Error getting pageviews by day:', error);
throw error;
}
return data;
}
// Agregação de eventos por tipo
export async function getEventCountsByType(startDate, endDate) {
const { data, error } = await supabase
.rpc('get_event_counts_by_type', {
start_date: startDate,
end_date: endDate
});
if (error) {
console.error('Error getting event counts by type:', error);
throw error;
}
return data;
}
// Cálculo de taxa de conversão
export async function getConversionRate(startDate, endDate, startEventName, conversionEventName) {
const { data, error } = await supabase
.rpc('get_conversion_rate', {
start_date: startDate,
end_date: endDate,
start_event: startEventName,
conversion_event: conversionEventName
});
if (error) {
console.error('Error calculating conversion rate:', error);
throw error;
}
return data[0] || { conversion_rate: 0, start_count: 0, conversion_count: 0 };
}

Funções SQL para Agregação

-- analytics_functions.sql
-- Função para obter pageviews por dia
CREATE OR REPLACE FUNCTION get_pageviews_by_day(start_date DATE, end_date DATE)
RETURNS TABLE (
day DATE,
pageview_count BIGINT
) AS $$
BEGIN
RETURN QUERY
SELECT
DATE(timestamp) AS day,
COUNT(*) AS pageview_count
FROM analytics_events
WHERE
name = 'pageview' AND
timestamp >= start_date AND
timestamp <= (end_date + INTERVAL '1 day')
GROUP BY DATE(timestamp)
ORDER BY day;
END;
$$ LANGUAGE plpgsql;
-- Função para obter contagem de eventos por tipo
CREATE OR REPLACE FUNCTION get_event_counts_by_type(start_date DATE, end_date DATE)
RETURNS TABLE (
event_name TEXT,
event_count BIGINT
) AS $$
BEGIN
RETURN QUERY
SELECT
name AS event_name,
COUNT(*) AS event_count
FROM analytics_events
WHERE
timestamp >= start_date AND
timestamp <= (end_date + INTERVAL '1 day')
GROUP BY name
ORDER BY event_count DESC;
END;
$$ LANGUAGE plpgsql;
-- Função para calcular taxa de conversão
CREATE OR REPLACE FUNCTION get_conversion_rate(
start_date DATE,
end_date DATE,
start_event TEXT,
conversion_event TEXT
)
RETURNS TABLE (
start_count BIGINT,
conversion_count BIGINT,
conversion_rate NUMERIC
) AS $$
BEGIN
RETURN QUERY
WITH start_events AS (
SELECT
client_id,
MIN(timestamp) AS first_timestamp
FROM analytics_events
WHERE
name = start_event AND
timestamp >= start_date AND
timestamp <= (end_date + INTERVAL '1 day')
GROUP BY client_id
),
conversion_events AS (
SELECT
client_id,
MIN(timestamp) AS first_timestamp
FROM analytics_events
WHERE
name = conversion_event AND
timestamp >= start_date AND
timestamp <= (end_date + INTERVAL '1 day')
GROUP BY client_id
),
converted_users AS (
SELECT
s.client_id
FROM start_events s
JOIN conversion_events c ON s.client_id = c.client_id AND c.first_timestamp >= s.first_timestamp
)
SELECT
COUNT(DISTINCT s.client_id) AS start_count,
COUNT(DISTINCT cu.client_id) AS conversion_count,
CASE
WHEN COUNT(DISTINCT s.client_id) = 0 THEN 0
ELSE ROUND((COUNT(DISTINCT cu.client_id)::NUMERIC / COUNT(DISTINCT s.client_id)::NUMERIC) * 100, 2)
END AS conversion_rate
FROM start_events s
LEFT JOIN converted_users cu ON s.client_id = cu.client_id;
END;
$$ LANGUAGE plpgsql;

Análise de Dados

Cálculo de Métricas-Chave

src/utils/analytics-metrics.js
import { supabase } from './supabase.js';
// Usuários ativos
export async function getActiveUsers(period = 'day') {
let timeFrame;
switch (period) {
case 'day':
timeFrame = '1 day';
break;
case 'week':
timeFrame = '7 days';
break;
case 'month':
timeFrame = '30 days';
break;
default:
timeFrame = '1 day';
}
const { data, error } = await supabase
.rpc('get_active_users', {
time_frame: timeFrame
});
if (error) {
console.error(`Error getting ${period} active users:`, error);
throw error;
}
return data[0]?.active_users || 0;
}
// Tempo médio na página
export async function getAverageTimeOnPage(path = null, startDate, endDate) {
const { data, error } = await supabase
.rpc('get_average_time_on_page', {
page_path: path,
start_date: startDate,
end_date: endDate
});
if (error) {
console.error('Error getting average time on page:', error);
throw error;
}
return data;
}
// Taxa de rejeição
export async function getBounceRate(startDate, endDate) {
const { data, error } = await supabase
.rpc('get_bounce_rate', {
start_date: startDate,
end_date: endDate
});
if (error) {
console.error('Error getting bounce rate:', error);
throw error;
}
return data[0]?.bounce_rate || 0;
}

Funções SQL para Métricas

-- analytics_metrics.sql
-- Função para obter usuários ativos
CREATE OR REPLACE FUNCTION get_active_users(time_frame INTERVAL)
RETURNS TABLE (
active_users BIGINT
) AS $$
BEGIN
RETURN QUERY
SELECT
COUNT(DISTINCT client_id) AS active_users
FROM analytics_events
WHERE timestamp >= NOW() - time_frame;
END;
$$ LANGUAGE plpgsql;
-- Função para obter tempo médio na página
CREATE OR REPLACE FUNCTION get_average_time_on_page(
page_path TEXT,
start_date DATE,
end_date DATE
)
RETURNS TABLE (
path TEXT,
avg_time_seconds NUMERIC,
pageview_count BIGINT
) AS $$
BEGIN
RETURN QUERY
WITH page_sessions AS (
SELECT
session_id,
data->>'path' AS path,
timestamp AS view_time,
LEAD(timestamp) OVER (PARTITION BY session_id ORDER BY timestamp) AS next_event_time
FROM analytics_events
WHERE
timestamp >= start_date AND
timestamp <= (end_date + INTERVAL '1 day') AND
(page_path IS NULL OR data->>'path' = page_path)
)
SELECT
path,
ROUND(AVG(EXTRACT(EPOCH FROM (next_event_time - view_time)))) AS avg_time_seconds,
COUNT(*) AS pageview_count
FROM page_sessions
WHERE
next_event_time IS NOT NULL AND
EXTRACT(EPOCH FROM (next_event_time - view_time)) BETWEEN 1 AND 3600 -- Filtrar tempos irrealistas (1s a 1h)
GROUP BY path
ORDER BY avg_time_seconds DESC;
END;
$$ LANGUAGE plpgsql;
-- Função para calcular taxa de rejeição
CREATE OR REPLACE FUNCTION get_bounce_rate(
start_date DATE,
end_date DATE
)
RETURNS TABLE (
total_sessions BIGINT,
bounce_sessions BIGINT,
bounce_rate NUMERIC
) AS $$
BEGIN
RETURN QUERY
WITH session_counts AS (
SELECT
session_id,
COUNT(*) AS event_count
FROM analytics_events
WHERE
timestamp >= start_date AND
timestamp <= (end_date + INTERVAL '1 day')
GROUP BY session_id
)
SELECT
COUNT(*) AS total_sessions,
SUM(CASE WHEN event_count = 1 THEN 1 ELSE 0 END) AS bounce_sessions,
ROUND((SUM(CASE WHEN event_count = 1 THEN 1 ELSE 0 END)::NUMERIC / COUNT(*)::NUMERIC) * 100, 2) AS bounce_rate
FROM session_counts;
END;
$$ LANGUAGE plpgsql;

Integração com Ferramentas de BI

Conexão com Metabase

scripts/export-to-metabase.js
import { supabase } from '../src/utils/supabase.js';
import fetch from 'node-fetch';
const METABASE_URL = process.env.METABASE_URL;
const METABASE_USERNAME = process.env.METABASE_USERNAME;
const METABASE_PASSWORD = process.env.METABASE_PASSWORD;
async function exportToMetabase() {
try {
// Autenticar com Metabase
const authResponse = await fetch(`${METABASE_URL}/api/session`, {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
username: METABASE_USERNAME,
password: METABASE_PASSWORD
})
});
const { id: sessionToken } = await authResponse.json();
// Criar ou atualizar coleção
const collectionResponse = await fetch(`${METABASE_URL}/api/collection`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'X-Metabase-Session': sessionToken
},
body: JSON.stringify({
name: 'Web Analytics',
color: '#509EE3',
description: 'Analytics data from our website'
})
});
const collection = await collectionResponse.json();
// Criar ou atualizar perguntas (queries)
const queries = [
{
name: 'Daily Pageviews',
description: 'Number of pageviews per day',
query: {
'source-table': 'analytics_events',
aggregation: [['count']],
breakout: [['field', 'timestamp', { 'temporal-unit': 'day' }]],
filter: ['=', ['field', 'name'], 'pageview']
}
},
{
name: 'Top Pages',
description: 'Most viewed pages',
query: {
'source-table': 'analytics_events',
aggregation: [['count']],
breakout: [['field', ['field', 'data'], 'path']],
filter: ['=', ['field', 'name'], 'pageview'],
'order-by': [['desc', ['aggregation', 0]]],
limit: 10
}
}
];
for (const queryDef of queries) {
await fetch(`${METABASE_URL}/api/card`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'X-Metabase-Session': sessionToken
},
body: JSON.stringify({
name: queryDef.name,
description: queryDef.description,
display: 'line',
visualization_settings: {},
dataset_query: {
type: 'query',
query: queryDef.query,
database: 1 // ID do banco de dados no Metabase
},
collection_id: collection.id
})
});
}
console.log('Successfully exported queries to Metabase');
} catch (error) {
console.error('Error exporting to Metabase:', error);
}
}
exportToMetabase();

Continua na Parte 3: Visualização e Privacidade de Dados