Custom Fact Extractors
This example shows how to create custom fact extractor plugins for specialized evaluation needs.
Overview
Section titled “Overview”Custom fact extractors allow you to:
- Domain-Specific Extraction: Extract facts for your specific domain
- Rule-Based Extraction: Use domain rules and patterns
- Hybrid Approaches: Combine multiple extraction methods
- Optimized Performance: Tailor extraction for your use case
Example 1: Product-Specific Extractor
Section titled “Example 1: Product-Specific Extractor”Create a fact extractor for product-related questions.
Plugin Definition
Section titled “Plugin Definition”<?php
namespace Drupal\my_module\Plugin\FactExtractor;
use Drupal\ai_autoevals\Plugin\FactExtractor\FactExtractorPluginBase;
/** * Product fact extractor. * * @FactExtractor( * id = "product_extractor", * label = @Translation("Product Extractor"), * description = @Translation("Extracts product-related facts."), * weight = 10 * ) */class ProductFactExtractor extends FactExtractorPluginBase {
/** * {@inheritdoc} */ public function extract(string $input, array $context = []): array { $facts = [];
// Extract product names if (preg_match_all('/\b(SuperWidget|TechGadget|ProTool)\b/i', $input, $matches)) { $products = array_unique($matches[0]); foreach ($products as $product) { $facts[] = "The answer should accurately describe $product"; } }
// Extract specifications if (preg_match('/specification|specs?|feature|capability/i', $input)) { $facts[] = "The answer should include product specifications and features"; }
// Extract pricing if (preg_match('/price|cost|expensive|cheap|budget/i', $input)) { $facts[] = "The answer should include accurate pricing information"; }
// Extract availability if (preg_match('/available|in stock|out of stock|ship|delivery/i', $input)) { $facts[] = "The answer should address product availability and shipping"; }
return $facts; }
/** * {@inheritdoc} */ public function isAvailable(): bool { return TRUE; }
}Using Custom Knowledge
Section titled “Using Custom Knowledge”Enhance the product extractor with custom knowledge:
public function extract(string $input, array $context = [], ?EvaluationSetInterface $evaluationSet = null): array { $facts = [];
// Use base extraction $facts = parent::extract($input, $context, $evaluationSet);
// Add custom knowledge facts if ($evaluationSet && $evaluationSet->hasCustomKnowledge()) { $knowledge = $evaluationSet->getCustomKnowledge();
// If question mentions a product in knowledge foreach (['SuperWidget', 'TechGadget', 'ProTool'] as $product) { if (stripos($input, $product) !== FALSE && stripos($knowledge, $product) !== FALSE) { $facts[] = "The answer should be consistent with the product knowledge base for $product"; } } }
return $facts;}Example 2: Medical Content Extractor
Section titled “Example 2: Medical Content Extractor”Create a fact extractor for medical content with strict requirements.
<?php
namespace Drupal\medical_ai\Plugin\FactExtractor;
use Drupal\ai_autoevals\Plugin\FactExtractor\FactExtractorPluginBase;
/** * Medical content fact extractor. * * @FactExtractor( * id = "medical_extractor", * label = @Translation("Medical Content Extractor"), * description = @Translation("Extracts medical content facts with safety requirements."), * weight = 20 * ) */class MedicalFactExtractor extends FactExtractorPluginBase {
/** * {@inheritdoc} */ public function extract(string $input, array $context = []): array { $facts = [];
// Require citations for medical claims if (preg_match('/medical|health|treatment|diagnosis|symptom|drug|medication/i', $input)) { $facts[] = "The answer should include citations or references to medical sources"; $facts[] = "The answer should be accurate and based on medical consensus"; }
// Check for disclaimer requirements $facts[] = "The answer should include appropriate medical disclaimers";
// Extract specific medical terms if (preg_match_all('/\b(vaccine|antibiotic|diabetes|hypertension|cancer|depression)\b/i', $input, $matches)) { $terms = array_unique($matches[0]); foreach ($terms as $term) { $facts[] = "The answer should provide accurate medical information about $term"; } }
// Check for safety warnings if (preg_match('/side effect|danger|risk|warning|caution/i', $input)) { $facts[] = "The answer should include relevant safety warnings and side effects"; }
return $facts; }
/** * {@inheritdoc} */ public function isAvailable(): bool { return TRUE; }
}Example 3: Technical Documentation Extractor
Section titled “Example 3: Technical Documentation Extractor”Create an extractor for technical documentation.
<?php
namespace Drupal\technical_docs\Plugin\FactExtractor;
use Drupal\ai_autoevals\Plugin\FactExtractor\FactExtractorPluginBase;
/** * Technical documentation fact extractor. * * @FactExtractor( * id = "technical_docs_extractor", * label = @Translation("Technical Documentation Extractor"), * description = @Translation("Extracts technical documentation facts."), * weight = 15 * ) */class TechnicalDocsFactExtractor extends FactExtractorPluginBase {
/** * {@inheritdoc} */ public function extract(string $input, array $context = []): array { $facts = [];
// Extract API calls if (preg_match_all('/\b(get|post|put|delete|patch)\s*\/api/i', $input, $matches)) { foreach ($matches[0] as $call) { $facts[] = "The answer should accurately describe the $call endpoint"; } }
// Extract parameters if (preg_match('/parameter|argument|query string|body|header/i', $input)) { $facts[] = "The answer should include all required parameters"; $facts[] = "The answer should describe parameter types and formats"; }
// Extract code examples if (preg_match('/code|example|implementation|sample/i', $input)) { $facts[] = "The answer should include working code examples"; $facts[] = "Code examples should be syntactically correct"; }
// Extract version information if (preg_match('/version|v\d+\.\d+|\d+\.\d+\.\d+/i', $input)) { $facts[] = "The answer should be specific to the correct version"; }
// Extract error handling if (preg_match('/error|exception|fail|status code/i', $input)) { $facts[] = "The answer should include error handling information"; }
return $facts; }
/** * {@inheritdoc} */ public function isAvailable(): bool { return TRUE; }
}Example 4: Configurable Extractor
Section titled “Example 4: Configurable Extractor”Create an extractor with configurable patterns.
<?php
namespace Drupal\my_module\Plugin\FactExtractor;
use Drupal\ai_autoevals\Plugin\FactExtractor\FactExtractorPluginBase;use Drupal\Core\Form\FormStateInterface;use Drupal\Core\Plugin\PluginFormInterface;
/** * Configurable fact extractor plugin. * * @FactExtractor( * id = "configurable_extractor", * label = @Translation("Configurable Extractor"), * description = @Translation("Extracts facts based on configurable patterns."), * weight = 30 * ) */class ConfigurableExtractor extends FactExtractorPluginBase implements PluginFormInterface {
/** * {@inheritdoc} */ public function defaultConfiguration(): array { return [ 'patterns' => [ 'numerical_values' => [ 'regex' => '/\b\d+(?:\.\d+)?\s*(?:%|percent|seconds|minutes|hours|days)\b/i', 'fact_template' => 'The answer should accurately reference: {match}', ], 'dates' => [ 'regex' => '/\b\d{4}-\d{2}-\d{2}\b/', 'fact_template' => 'The answer should include the date: {match}', ], ], ]; }
/** * {@inheritdoc} */ public function extract(string $input, array $context = []): array { $facts = []; $patterns = $this->getConfiguration()['patterns'] ?? [];
foreach ($patterns as $name => $pattern) { if (preg_match_all($pattern['regex'], $input, $matches)) { foreach ($matches[0] as $match) { $fact = str_replace('{match}', $match, $pattern['fact_template']); $facts[] = $fact; } } }
return $facts; }
/** * {@inheritdoc} */ public function buildConfigurationForm(array $form, FormStateInterface $form_state): array { $config = $this->getConfiguration(); $patterns = $config['patterns'] ?? [];
$form['patterns'] = [ '#type' => 'textarea', '#title' => $this->t('Patterns (JSON)'), '#default_value' => json_encode($patterns, JSON_PRETTY_PRINT), '#description' => $this->t('JSON array of patterns with regex and fact_template fields.'), '#rows' => 10, ];
$form['example'] = [ '#type' => 'details', '#title' => $this->t('Example Pattern'), '#open' => FALSE, 'code' => [ '#markup' => '<pre>' . json_encode([ 'patterns' => [ 'numerical' => [ 'regex' => '/\b\d+\s*(?:seconds|minutes|hours)\b/i', 'fact_template' => 'The answer should reference: {match}', ], ], ], JSON_PRETTY_PRINT) . '</pre>', ], ];
return $form; }
/** * {@inheritdoc} */ public function submitConfigurationForm(array &$form, FormStateInterface $form_state): void { $patterns = json_decode($form_state->getValue('patterns'), TRUE);
if (!is_array($patterns)) { throw new \InvalidArgumentException('Patterns must be valid JSON array'); }
$this->setConfiguration(['patterns' => $patterns]); }
/** * {@inheritdoc} */ public function isAvailable(): bool { return TRUE; }
}Example 5: Hybrid Extractor with Fallback
Section titled “Example 5: Hybrid Extractor with Fallback”Combine AI extraction with rule-based fallback.
<?php
namespace Drupal\my_module\Plugin\FactExtractor;
use Drupal\ai_autoevals\Plugin\FactExtractor\FactExtractorPluginBase;
/** * Hybrid fact extractor with fallback. * * @FactExtractor( * id = "hybrid_fallback_extractor", * label = @Translation("Hybrid Fallback Extractor"), * description = @Translation("Uses AI extraction with rule-based fallback."), * weight = 25 * ) */class HybridFallbackExtractor extends FactExtractorPluginBase {
/** * {@inheritdoc} */ public function extract(string $input, array $context = [], ?EvaluationSetInterface $evaluationSet = null): array { $facts = [];
// Try AI extraction first try { $aiExtractor = $this->getAiExtractor(); $facts = $aiExtractor->extract($input, $context, $evaluationSet); } catch (\Exception $e) { // Fallback to rule-based extraction $facts = $this->ruleBasedExtraction($input); }
// Ensure we have at least some facts if (empty($facts)) { $facts = $this->getDefaultFacts($input); }
return $facts; }
/** * Gets AI extractor instance. */ protected function getAiExtractor(): object { $plugin_manager = \Drupal::service('plugin.manager.ai_autoevals.fact_extractor'); return $plugin_manager->createInstance('ai_generated'); }
/** * Rule-based extraction fallback. */ protected function ruleBasedExtraction(string $input): array { $facts = [];
// Simple keyword extraction $keywords = ['how', 'what', 'why', 'when', 'where', 'who', 'which']; foreach ($keywords as $keyword) { if (stripos($input, $keyword) !== FALSE) { $facts[] = "The answer should address the $keyword question"; } }
return $facts; }
/** * Gets default facts when extraction fails. */ protected function getDefaultFacts(string $input): array { return [ 'The answer should address the user\'s question accurately', 'The answer should be relevant and helpful', 'The answer should be clear and concise', ]; }
/** * {@inheritdoc} */ public function isAvailable(): bool { return TRUE; }
}Using Custom Extractors
Section titled “Using Custom Extractors”1. Create Plugin File
Section titled “1. Create Plugin File”Place your plugin file in Plugin/FactExtractor/ directory of your module.
2. Clear Cache
Section titled “2. Clear Cache”Clear plugin cache to discover your plugin:
drush cache:rebuild3. Use in Evaluation Set
Section titled “3. Use in Evaluation Set”- Navigate to
/admin/content/ai-autoevals/sets - Create or edit an evaluation set
- Select your custom extractor from the fact extraction method dropdown
- Save the configuration
4. Test Your Extractor
Section titled “4. Test Your Extractor”<?php
// Test your extractor$plugin_manager = \Drupal::service('plugin.manager.ai_autoevals.fact_extractor');$extractor = $plugin_manager->createInstance('my_custom_extractor');
$testInput = 'What are the specifications of SuperWidget?';$facts = $extractor->extract($testInput);
foreach ($facts as $fact) { print $fact . "\n";}Best Practices
Section titled “Best Practices”1. Keep Extractors Fast
Section titled “1. Keep Extractors Fast”Avoid slow operations in your extractors. Use simple regex patterns instead of complex algorithms.
2. Provide Specific Facts
Section titled “2. Provide Specific Facts”Generate clear, specific evaluation criteria:
// Good$facts[] = "The answer should state that SuperWidget Pro has 8 hours of battery life";
// Bad$facts[] = "The answer should mention battery";3. Handle Edge Cases
Section titled “3. Handle Edge Cases”Handle various input formats:
public function extract(string $input, array $context = []): array { if (empty(trim($input))) { return []; }
$input = trim($input);
// Extract facts // ...}4. Use Custom Knowledge
Section titled “4. Use Custom Knowledge”Leverage custom knowledge from evaluation sets:
if ($evaluationSet && $evaluationSet->hasCustomKnowledge()) { $knowledge = $evaluationSet->getCustomKnowledge(); // Use knowledge to enhance extraction}5. Document Your Extractor
Section titled “5. Document Your Extractor”Add clear documentation to help others understand and use your extractor:
/** * Product-specific fact extractor. * * This extractor generates evaluation criteria for product-related questions. * It focuses on product specifications, features, pricing, and availability. * * Custom knowledge should include: * - Product names and descriptions * - Specifications and features * - Pricing information * - Availability status * * @FactExtractor( * id = "product_extractor", * label = @Translation("Product Extractor"), * description = @Translation("Extracts product-related facts."), * weight = 10 * ) */Next Steps
Section titled “Next Steps”- Plugin Development - Plugin development guide
- Content Moderation - Content moderation workflow
- Event System - Event system guide