Skip to content
Secure Private AI for Enterprises and Developers - amazee.ai

Custom Fact Extractors

This example shows how to create custom fact extractor plugins for specialized evaluation needs.

Custom fact extractors allow you to:

  1. Domain-Specific Extraction: Extract facts for your specific domain
  2. Rule-Based Extraction: Use domain rules and patterns
  3. Hybrid Approaches: Combine multiple extraction methods
  4. Optimized Performance: Tailor extraction for your use case

Create a fact extractor for product-related questions.

<?php
namespace Drupal\my_module\Plugin\FactExtractor;
use Drupal\ai_autoevals\Plugin\FactExtractor\FactExtractorPluginBase;
/**
* Product fact extractor.
*
* @FactExtractor(
* id = "product_extractor",
* label = @Translation("Product Extractor"),
* description = @Translation("Extracts product-related facts."),
* weight = 10
* )
*/
class ProductFactExtractor extends FactExtractorPluginBase {
/**
* {@inheritdoc}
*/
public function extract(string $input, array $context = []): array {
$facts = [];
// Extract product names
if (preg_match_all('/\b(SuperWidget|TechGadget|ProTool)\b/i', $input, $matches)) {
$products = array_unique($matches[0]);
foreach ($products as $product) {
$facts[] = "The answer should accurately describe $product";
}
}
// Extract specifications
if (preg_match('/specification|specs?|feature|capability/i', $input)) {
$facts[] = "The answer should include product specifications and features";
}
// Extract pricing
if (preg_match('/price|cost|expensive|cheap|budget/i', $input)) {
$facts[] = "The answer should include accurate pricing information";
}
// Extract availability
if (preg_match('/available|in stock|out of stock|ship|delivery/i', $input)) {
$facts[] = "The answer should address product availability and shipping";
}
return $facts;
}
/**
* {@inheritdoc}
*/
public function isAvailable(): bool {
return TRUE;
}
}

Enhance the product extractor with custom knowledge:

public function extract(string $input, array $context = [], ?EvaluationSetInterface $evaluationSet = null): array {
$facts = [];
// Use base extraction
$facts = parent::extract($input, $context, $evaluationSet);
// Add custom knowledge facts
if ($evaluationSet && $evaluationSet->hasCustomKnowledge()) {
$knowledge = $evaluationSet->getCustomKnowledge();
// If question mentions a product in knowledge
foreach (['SuperWidget', 'TechGadget', 'ProTool'] as $product) {
if (stripos($input, $product) !== FALSE && stripos($knowledge, $product) !== FALSE) {
$facts[] = "The answer should be consistent with the product knowledge base for $product";
}
}
}
return $facts;
}

Create a fact extractor for medical content with strict requirements.

<?php
namespace Drupal\medical_ai\Plugin\FactExtractor;
use Drupal\ai_autoevals\Plugin\FactExtractor\FactExtractorPluginBase;
/**
* Medical content fact extractor.
*
* @FactExtractor(
* id = "medical_extractor",
* label = @Translation("Medical Content Extractor"),
* description = @Translation("Extracts medical content facts with safety requirements."),
* weight = 20
* )
*/
class MedicalFactExtractor extends FactExtractorPluginBase {
/**
* {@inheritdoc}
*/
public function extract(string $input, array $context = []): array {
$facts = [];
// Require citations for medical claims
if (preg_match('/medical|health|treatment|diagnosis|symptom|drug|medication/i', $input)) {
$facts[] = "The answer should include citations or references to medical sources";
$facts[] = "The answer should be accurate and based on medical consensus";
}
// Check for disclaimer requirements
$facts[] = "The answer should include appropriate medical disclaimers";
// Extract specific medical terms
if (preg_match_all('/\b(vaccine|antibiotic|diabetes|hypertension|cancer|depression)\b/i', $input, $matches)) {
$terms = array_unique($matches[0]);
foreach ($terms as $term) {
$facts[] = "The answer should provide accurate medical information about $term";
}
}
// Check for safety warnings
if (preg_match('/side effect|danger|risk|warning|caution/i', $input)) {
$facts[] = "The answer should include relevant safety warnings and side effects";
}
return $facts;
}
/**
* {@inheritdoc}
*/
public function isAvailable(): bool {
return TRUE;
}
}

Example 3: Technical Documentation Extractor

Section titled “Example 3: Technical Documentation Extractor”

Create an extractor for technical documentation.

<?php
namespace Drupal\technical_docs\Plugin\FactExtractor;
use Drupal\ai_autoevals\Plugin\FactExtractor\FactExtractorPluginBase;
/**
* Technical documentation fact extractor.
*
* @FactExtractor(
* id = "technical_docs_extractor",
* label = @Translation("Technical Documentation Extractor"),
* description = @Translation("Extracts technical documentation facts."),
* weight = 15
* )
*/
class TechnicalDocsFactExtractor extends FactExtractorPluginBase {
/**
* {@inheritdoc}
*/
public function extract(string $input, array $context = []): array {
$facts = [];
// Extract API calls
if (preg_match_all('/\b(get|post|put|delete|patch)\s*\/api/i', $input, $matches)) {
foreach ($matches[0] as $call) {
$facts[] = "The answer should accurately describe the $call endpoint";
}
}
// Extract parameters
if (preg_match('/parameter|argument|query string|body|header/i', $input)) {
$facts[] = "The answer should include all required parameters";
$facts[] = "The answer should describe parameter types and formats";
}
// Extract code examples
if (preg_match('/code|example|implementation|sample/i', $input)) {
$facts[] = "The answer should include working code examples";
$facts[] = "Code examples should be syntactically correct";
}
// Extract version information
if (preg_match('/version|v\d+\.\d+|\d+\.\d+\.\d+/i', $input)) {
$facts[] = "The answer should be specific to the correct version";
}
// Extract error handling
if (preg_match('/error|exception|fail|status code/i', $input)) {
$facts[] = "The answer should include error handling information";
}
return $facts;
}
/**
* {@inheritdoc}
*/
public function isAvailable(): bool {
return TRUE;
}
}

Create an extractor with configurable patterns.

<?php
namespace Drupal\my_module\Plugin\FactExtractor;
use Drupal\ai_autoevals\Plugin\FactExtractor\FactExtractorPluginBase;
use Drupal\Core\Form\FormStateInterface;
use Drupal\Core\Plugin\PluginFormInterface;
/**
* Configurable fact extractor plugin.
*
* @FactExtractor(
* id = "configurable_extractor",
* label = @Translation("Configurable Extractor"),
* description = @Translation("Extracts facts based on configurable patterns."),
* weight = 30
* )
*/
class ConfigurableExtractor extends FactExtractorPluginBase implements PluginFormInterface {
/**
* {@inheritdoc}
*/
public function defaultConfiguration(): array {
return [
'patterns' => [
'numerical_values' => [
'regex' => '/\b\d+(?:\.\d+)?\s*(?:%|percent|seconds|minutes|hours|days)\b/i',
'fact_template' => 'The answer should accurately reference: {match}',
],
'dates' => [
'regex' => '/\b\d{4}-\d{2}-\d{2}\b/',
'fact_template' => 'The answer should include the date: {match}',
],
],
];
}
/**
* {@inheritdoc}
*/
public function extract(string $input, array $context = []): array {
$facts = [];
$patterns = $this->getConfiguration()['patterns'] ?? [];
foreach ($patterns as $name => $pattern) {
if (preg_match_all($pattern['regex'], $input, $matches)) {
foreach ($matches[0] as $match) {
$fact = str_replace('{match}', $match, $pattern['fact_template']);
$facts[] = $fact;
}
}
}
return $facts;
}
/**
* {@inheritdoc}
*/
public function buildConfigurationForm(array $form, FormStateInterface $form_state): array {
$config = $this->getConfiguration();
$patterns = $config['patterns'] ?? [];
$form['patterns'] = [
'#type' => 'textarea',
'#title' => $this->t('Patterns (JSON)'),
'#default_value' => json_encode($patterns, JSON_PRETTY_PRINT),
'#description' => $this->t('JSON array of patterns with regex and fact_template fields.'),
'#rows' => 10,
];
$form['example'] = [
'#type' => 'details',
'#title' => $this->t('Example Pattern'),
'#open' => FALSE,
'code' => [
'#markup' => '<pre>' . json_encode([
'patterns' => [
'numerical' => [
'regex' => '/\b\d+\s*(?:seconds|minutes|hours)\b/i',
'fact_template' => 'The answer should reference: {match}',
],
],
], JSON_PRETTY_PRINT) . '</pre>',
],
];
return $form;
}
/**
* {@inheritdoc}
*/
public function submitConfigurationForm(array &$form, FormStateInterface $form_state): void {
$patterns = json_decode($form_state->getValue('patterns'), TRUE);
if (!is_array($patterns)) {
throw new \InvalidArgumentException('Patterns must be valid JSON array');
}
$this->setConfiguration(['patterns' => $patterns]);
}
/**
* {@inheritdoc}
*/
public function isAvailable(): bool {
return TRUE;
}
}

Combine AI extraction with rule-based fallback.

<?php
namespace Drupal\my_module\Plugin\FactExtractor;
use Drupal\ai_autoevals\Plugin\FactExtractor\FactExtractorPluginBase;
/**
* Hybrid fact extractor with fallback.
*
* @FactExtractor(
* id = "hybrid_fallback_extractor",
* label = @Translation("Hybrid Fallback Extractor"),
* description = @Translation("Uses AI extraction with rule-based fallback."),
* weight = 25
* )
*/
class HybridFallbackExtractor extends FactExtractorPluginBase {
/**
* {@inheritdoc}
*/
public function extract(string $input, array $context = [], ?EvaluationSetInterface $evaluationSet = null): array {
$facts = [];
// Try AI extraction first
try {
$aiExtractor = $this->getAiExtractor();
$facts = $aiExtractor->extract($input, $context, $evaluationSet);
}
catch (\Exception $e) {
// Fallback to rule-based extraction
$facts = $this->ruleBasedExtraction($input);
}
// Ensure we have at least some facts
if (empty($facts)) {
$facts = $this->getDefaultFacts($input);
}
return $facts;
}
/**
* Gets AI extractor instance.
*/
protected function getAiExtractor(): object {
$plugin_manager = \Drupal::service('plugin.manager.ai_autoevals.fact_extractor');
return $plugin_manager->createInstance('ai_generated');
}
/**
* Rule-based extraction fallback.
*/
protected function ruleBasedExtraction(string $input): array {
$facts = [];
// Simple keyword extraction
$keywords = ['how', 'what', 'why', 'when', 'where', 'who', 'which'];
foreach ($keywords as $keyword) {
if (stripos($input, $keyword) !== FALSE) {
$facts[] = "The answer should address the $keyword question";
}
}
return $facts;
}
/**
* Gets default facts when extraction fails.
*/
protected function getDefaultFacts(string $input): array {
return [
'The answer should address the user\'s question accurately',
'The answer should be relevant and helpful',
'The answer should be clear and concise',
];
}
/**
* {@inheritdoc}
*/
public function isAvailable(): bool {
return TRUE;
}
}

Place your plugin file in Plugin/FactExtractor/ directory of your module.

Clear plugin cache to discover your plugin:

Terminal window
drush cache:rebuild
  1. Navigate to /admin/content/ai-autoevals/sets
  2. Create or edit an evaluation set
  3. Select your custom extractor from the fact extraction method dropdown
  4. Save the configuration
<?php
// Test your extractor
$plugin_manager = \Drupal::service('plugin.manager.ai_autoevals.fact_extractor');
$extractor = $plugin_manager->createInstance('my_custom_extractor');
$testInput = 'What are the specifications of SuperWidget?';
$facts = $extractor->extract($testInput);
foreach ($facts as $fact) {
print $fact . "\n";
}

Avoid slow operations in your extractors. Use simple regex patterns instead of complex algorithms.

Generate clear, specific evaluation criteria:

// Good
$facts[] = "The answer should state that SuperWidget Pro has 8 hours of battery life";
// Bad
$facts[] = "The answer should mention battery";

Handle various input formats:

public function extract(string $input, array $context = []): array {
if (empty(trim($input))) {
return [];
}
$input = trim($input);
// Extract facts
// ...
}

Leverage custom knowledge from evaluation sets:

if ($evaluationSet && $evaluationSet->hasCustomKnowledge()) {
$knowledge = $evaluationSet->getCustomKnowledge();
// Use knowledge to enhance extraction
}

Add clear documentation to help others understand and use your extractor:

/**
* Product-specific fact extractor.
*
* This extractor generates evaluation criteria for product-related questions.
* It focuses on product specifications, features, pricing, and availability.
*
* Custom knowledge should include:
* - Product names and descriptions
* - Specifications and features
* - Pricing information
* - Availability status
*
* @FactExtractor(
* id = "product_extractor",
* label = @Translation("Product Extractor"),
* description = @Translation("Extracts product-related facts."),
* weight = 10
* )
*/