Learn how to maintain a clean and organized knowledge base by managing content lifecycle, removing outdated information, and keeping your data fresh.

What You’ll Learn

This guide covers:

  • Listing and filtering content
  • Identifying outdated or duplicate content
  • Bulk deletion strategies
  • Best practices for content maintenance

Why Content Cleanup Matters

Regular cleanup helps:

  • Improve search accuracy: Remove outdated information
  • Reduce storage costs: Delete unnecessary content
  • Maintain quality: Keep only relevant, current content
  • Enhance performance: Smaller datasets search faster

Listing and Reviewing Content

const API_URL = 'https://sdk.senso.ai/api/v1';
const API_KEY = 'YOUR_API_KEY';

async function reviewContent() {
  try {
    // List all content with pagination
    let allContent = [];
    let offset = 0;
    const limit = 100;
    
    while (true) {
      const response = await fetch(`${API_URL}/content?limit=${limit}&offset=${offset}`, {
        headers: { 'X-API-Key': API_KEY }
      });
      const data = await response.json();
      allContent = allContent.concat(data.items);
      
      if (data.items.length < limit) break;
      offset += limit;
    }
    
    console.log(`Total content items: ${allContent.length}`);
    
    // Analyze content by status
    const byStatus = allContent.reduce((acc, item) => {
      acc[item.processing_status] = (acc[item.processing_status] || 0) + 1;
      return acc;
    }, {});
    
    console.log('Content by status:', byStatus);
    
    // Find old content (over 90 days)
    const ninetyDaysAgo = new Date();
    ninetyDaysAgo.setDate(ninetyDaysAgo.getDate() - 90);
    
    const oldContent = allContent.filter(item => 
      new Date(item.created_at) < ninetyDaysAgo
    );
    
    console.log(`Content older than 90 days: ${oldContent.length}`);
    
    // Identify content with processing errors
    const failedContent = allContent.filter(item => 
      item.processing_status === 'failed'
    );
    
    console.log(`Failed content items: ${failedContent.length}`);
    
    return { allContent, oldContent, failedContent };
    
  } catch (error) {
    console.error('Error reviewing content:', error);
  }
}

reviewContent();

Cleanup Strategies

1. Remove Failed Content

async function cleanupFailedContent() {
  const { failedContent } = await reviewContent();
  
  console.log(`Removing ${failedContent.length} failed items...`);
  
  for (const item of failedContent) {
    try {
      await fetch(`${API_URL}/content/${item.id}`, {
        method: 'DELETE',
        headers: { 'X-API-Key': API_KEY }
      });
      console.log(`Deleted failed content: ${item.title}`);
    } catch (error) {
      console.error(`Failed to delete ${item.id}:`, error);
    }
  }
}

2. Archive Old Content

Instead of deleting, you might want to archive old content:

async function archiveOldContent() {
  // Create an archive category
  const categoryResponse = await fetch(`${API_URL}/categories`, {
    method: 'POST',
    headers: {
      'X-API-Key': API_KEY,
      'Content-Type': 'application/json'
    },
    body: JSON.stringify({
      name: 'Archive',
      description: 'Archived content for historical reference'
    })
  });
  const archiveCategory = await categoryResponse.json();
  
  const { oldContent } = await reviewContent();
  
  // Move old content to archive (update with category)
  for (const item of oldContent) {
    if (item.type === 'raw') {
      await fetch(`${API_URL}/content/raw/${item.id}`, {
        method: 'PUT',
        headers: {
          'X-API-Key': API_KEY,
          'Content-Type': 'application/json'
        },
        body: JSON.stringify({
          title: `[ARCHIVED] ${item.title}`,
          category_id: archiveCategory.category_id,
          text: item.text || 'Archived content'
        })
      });
    }
  }
  
  console.log(`Archived ${oldContent.length} items`);
}

3. Deduplicate Content

Find and remove duplicate content:

async function findDuplicates() {
  const { allContent } = await reviewContent();
  
  // Group by title to find potential duplicates
  const byTitle = {};
  allContent.forEach(item => {
    const key = item.title.toLowerCase().trim();
    if (!byTitle[key]) byTitle[key] = [];
    byTitle[key].push(item);
  });
  
  // Find duplicates
  const duplicates = Object.entries(byTitle)
    .filter(([_, items]) => items.length > 1)
    .map(([title, items]) => ({
      title,
      items: items.sort((a, b) => 
        new Date(b.created_at) - new Date(a.created_at)
      )
    }));
  
  console.log(`Found ${duplicates.length} duplicate groups`);
  
  // Keep newest, delete older versions
  for (const group of duplicates) {
    const [keep, ...remove] = group.items;
    console.log(`Keeping: ${keep.title} (${keep.created_at})`);
    
    for (const item of remove) {
      await fetch(`${API_URL}/content/${item.id}`, {
        method: 'DELETE',
        headers: { 'X-API-Key': API_KEY }
      });
      console.log(`  Deleted older version: ${item.created_at}`);
    }
  }
}

Automated Cleanup Schedule

Create a scheduled cleanup job:

async function scheduledCleanup() {
  console.log('Starting scheduled cleanup...');
  
  // 1. Remove failed content
  await cleanupFailedContent();
  
  // 2. Archive content older than 180 days
  await archiveOldContent();
  
  // 3. Remove duplicates
  await findDuplicates();
  
  // 4. Generate cleanup report
  const report = await generateCleanupReport();
  console.log('Cleanup complete!', report);
}

// Run weekly
setInterval(scheduledCleanup, 7 * 24 * 60 * 60 * 1000);

Best Practices

  1. Always backup first: Export important content before bulk deletions
  2. Use soft deletes: Archive instead of delete when possible
  3. Review before deleting: Manually verify bulk deletions
  4. Keep audit logs: Track what was deleted and when
  5. Set retention policies: Define how long to keep different content types

Cleanup Metrics

Track the health of your knowledge base:

async function generateCleanupReport() {
  const { allContent } = await reviewContent();
  
  const report = {
    total_items: allContent.length,
    by_type: {},
    by_status: {},
    average_age_days: 0,
    storage_estimate_mb: 0
  };
  
  // Calculate metrics
  allContent.forEach(item => {
    report.by_type[item.type] = (report.by_type[item.type] || 0) + 1;
    report.by_status[item.processing_status] = 
      (report.by_status[item.processing_status] || 0) + 1;
  });
  
  // Average age
  const totalAge = allContent.reduce((sum, item) => {
    const age = Date.now() - new Date(item.created_at).getTime();
    return sum + age;
  }, 0);
  report.average_age_days = Math.round(totalAge / allContent.length / 86400000);
  
  return report;
}

Next Steps