Skip to content

data-analyst.md

Data Analyst Agent

An agent that queries databases, performs calculations, and generates insights.

Overview

This example demonstrates:

  • Database query tools
  • Statistical aggregation
  • Comparative analysis
  • Trend detection

Full Code

typescript
import { Agent, OpenAIProvider, defineTool } from 'agentforge';
import { z } from 'zod';

// ============================================
// Mock Database
// ============================================

interface SalesRecord {
  id: string;
  date: string;
  product: string;
  category: string;
  quantity: number;
  revenue: number;
  region: string;
}

const salesData: SalesRecord[] = [
  { id: '1', date: '2024-01-15', product: 'Widget A', category: 'Widgets', quantity: 150, revenue: 4500, region: 'North' },
  { id: '2', date: '2024-01-15', product: 'Gadget B', category: 'Gadgets', quantity: 75, revenue: 3750, region: 'South' },
  { id: '3', date: '2024-01-16', product: 'Widget A', category: 'Widgets', quantity: 200, revenue: 6000, region: 'East' },
  { id: '4', date: '2024-01-16', product: 'Widget C', category: 'Widgets', quantity: 100, revenue: 5000, region: 'West' },
  { id: '5', date: '2024-01-17', product: 'Gadget B', category: 'Gadgets', quantity: 120, revenue: 6000, region: 'North' },
  // ... more records
];

// ============================================
// Tools
// ============================================

const querySalesTool = defineTool({
  name: 'query_sales',
  description: 'Query the sales database with optional filters',
  parameters: z.object({
    startDate: z.string().optional().describe('Start date (YYYY-MM-DD)'),
    endDate: z.string().optional().describe('End date (YYYY-MM-DD)'),
    category: z.string().optional().describe('Product category'),
    region: z.string().optional().describe('Sales region'),
    limit: z.number().default(10).describe('Max records'),
  }),
  execute: async ({ startDate, endDate, category, region, limit }) => {
    let results = [...salesData];

    if (startDate) results = results.filter((r) => r.date >= startDate);
    if (endDate) results = results.filter((r) => r.date <= endDate);
    if (category) results = results.filter((r) => 
      r.category.toLowerCase() === category.toLowerCase()
    );
    if (region) results = results.filter((r) => 
      r.region.toLowerCase() === region.toLowerCase()
    );

    return { totalRecords: results.length, records: results.slice(0, limit) };
  },
});

const calculateStatsTool = defineTool({
  name: 'calculate_stats',
  description: 'Calculate aggregate statistics on sales data',
  parameters: z.object({
    metric: z.enum(['revenue', 'quantity']).describe('Metric to calculate'),
    operation: z.enum(['sum', 'average', 'min', 'max', 'count']),
    groupBy: z.enum(['category', 'region', 'product', 'date']).optional(),
    filters: z.object({
      category: z.string().optional(),
      region: z.string().optional(),
    }).optional(),
  }),
  execute: async ({ metric, operation, groupBy, filters }) => {
    let data = [...salesData];

    // Apply filters
    if (filters?.category) {
      data = data.filter((r) => 
        r.category.toLowerCase() === filters.category!.toLowerCase()
      );
    }
    if (filters?.region) {
      data = data.filter((r) => 
        r.region.toLowerCase() === filters.region!.toLowerCase()
      );
    }

    const calculate = (values: number[]): number => {
      switch (operation) {
        case 'sum': return values.reduce((a, b) => a + b, 0);
        case 'average': return values.reduce((a, b) => a + b, 0) / values.length;
        case 'min': return Math.min(...values);
        case 'max': return Math.max(...values);
        case 'count': return values.length;
      }
    };

    if (groupBy) {
      const groups = new Map<string, number[]>();
      
      for (const record of data) {
        const key = record[groupBy as keyof SalesRecord] as string;
        if (!groups.has(key)) groups.set(key, []);
        groups.get(key)!.push(record[metric]);
      }

      const results: Record<string, number> = {};
      for (const [key, values] of groups) {
        results[key] = calculate(values);
      }

      return { metric, operation, groupBy, results };
    }

    return {
      metric,
      operation,
      result: calculate(data.map((r) => r[metric])),
      recordCount: data.length,
    };
  },
});

const compareTool = defineTool({
  name: 'compare_metrics',
  description: 'Compare metrics between two categories or regions',
  parameters: z.object({
    metric: z.enum(['revenue', 'quantity']),
    compareBy: z.enum(['category', 'region']),
    value1: z.string().describe('First value to compare'),
    value2: z.string().describe('Second value to compare'),
  }),
  execute: async ({ metric, compareBy, value1, value2 }) => {
    const getData = (filterValue: string) => {
      return salesData
        .filter((r) => {
          const field = r[compareBy as keyof SalesRecord] as string;
          return field.toLowerCase() === filterValue.toLowerCase();
        })
        .reduce((sum, r) => sum + r[metric], 0);
    };

    const total1 = getData(value1);
    const total2 = getData(value2);
    const difference = total1 - total2;
    const percentDiff = total2 !== 0 
      ? ((difference / total2) * 100).toFixed(2) 
      : 'N/A';

    return {
      comparison: { [value1]: total1, [value2]: total2 },
      difference,
      percentageDifference: percentDiff + '%',
      winner: total1 > total2 ? value1 : total2 > total1 ? value2 : 'tie',
    };
  },
});

const trendAnalysisTool = defineTool({
  name: 'analyze_trend',
  description: 'Analyze trends over time for a metric',
  parameters: z.object({
    metric: z.enum(['revenue', 'quantity']),
    category: z.string().optional().describe('Filter by category'),
  }),
  execute: async ({ metric, category }) => {
    let data = [...salesData];
    
    if (category) {
      data = data.filter((r) => 
        r.category.toLowerCase() === category.toLowerCase()
      );
    }

    // Group by date
    const byDate = new Map<string, number>();
    for (const record of data) {
      const current = byDate.get(record.date) || 0;
      byDate.set(record.date, current + record[metric]);
    }

    const sorted = Array.from(byDate.entries())
      .sort((a, b) => a[0].localeCompare(b[0]));
    
    const values = sorted.map(([, v]) => v);
    const firstHalf = values.slice(0, Math.floor(values.length / 2));
    const secondHalf = values.slice(Math.floor(values.length / 2));
    
    const avgFirst = firstHalf.reduce((a, b) => a + b, 0) / firstHalf.length;
    const avgSecond = secondHalf.reduce((a, b) => a + b, 0) / secondHalf.length;
    
    const trend = avgSecond > avgFirst ? 'increasing' 
      : avgSecond < avgFirst ? 'decreasing' 
      : 'stable';

    return {
      metric,
      category: category || 'all',
      dataPoints: sorted.map(([date, value]) => ({ date, value })),
      trend,
      changePercent: ((avgSecond - avgFirst) / avgFirst * 100).toFixed(2) + '%',
    };
  },
});

// ============================================
// Agent
// ============================================

const agent = new Agent({
  provider: new OpenAIProvider({ apiKey: process.env.OPENAI_API_KEY! }),
  tools: [querySalesTool, calculateStatsTool, compareTool, trendAnalysisTool],
  systemPrompt: `You are a data analyst assistant. Help users understand their sales data by:
- Querying the database for specific information
- Calculating statistics and aggregates
- Comparing metrics across dimensions
- Identifying trends and patterns

Always explain your findings clearly and suggest follow-up analyses.`,
});

// ============================================
// Usage
// ============================================

async function analyze(question: string) {
  console.log(`Question: ${question}\n`);
  const response = await agent.run(question);
  console.log(`Analysis: ${response.content}\n`);
}

await analyze('What was our total revenue last week?');
await analyze('Compare revenue between North and South regions');
await analyze('Show me the revenue trend for Gadgets');

Key Patterns

1. Flexible Filtering

typescript
parameters: z.object({
  startDate: z.string().optional(),
  endDate: z.string().optional(),
  category: z.string().optional(),
  region: z.string().optional(),
}),

Optional parameters let the LLM build targeted queries.

2. Grouping and Aggregation

typescript
if (groupBy) {
  const groups = new Map<string, number[]>();
  // Group values by dimension
  // Calculate aggregate for each group
}

3. Comparative Analysis

typescript
return {
  comparison: { [value1]: total1, [value2]: total2 },
  difference,
  percentageDifference,
  winner,
};

Structured output helps the LLM explain results clearly.

Try It

bash
export OPENAI_API_KEY=your-key
npx ts-node examples/data-analyst/index.ts

Released under the MIT License.