science_live.pipeline.common

Contents

science_live.pipeline.common#

Science Live Pipeline: Common Data Models#

Shared data structures used throughout the steps of the Science Live processing pipeline. All pipeline steps import from this module to ensure consistent data flow.

Author: Science Live Team Version: 0.0.1

Module Contents#

Classes#

EntityType

Types of entities that can be extracted

QuestionType

Types of questions

ConfidenceLevel

Confidence levels for processing results

ProcessingContext

Context passed through the entire pipeline

ProcessedQuestion

Output of question processing step

ExtractedEntity

An entity extracted from the question

LinkedEntities

Output of entity extraction and linking

RosettaStatement

A structured Rosetta statement

GeneratedStatements

Output of Rosetta statement generation

SPARQLQuery

Generated SPARQL query

GeneratedQueries

Output of SPARQL generation

QueryResults

Raw results from SPARQL execution

StructuredResult

A single structured result

ProcessedResults

Output of result processing

NaturalLanguageResult

Final natural language output

PipelineStep

Abstract base class for all pipeline steps

Functions#

get_confidence_level

Convert numeric confidence to confidence level enum

merge_metadata

Merge multiple metadata dictionaries

validate_processing_context

Validate processing context

validate_extracted_entity

Validate extracted entity

validate_rosetta_statement

Validate Rosetta statement

validate_sparql_query

Validate SPARQL query

Data#

API#

science_live.pipeline.common.__all__ = ['ProcessingContext', 'ProcessedQuestion', 'ExtractedEntity', 'LinkedEntities', 'RosettaStatement', ...#
class science_live.pipeline.common.EntityType(*args, **kwds)[source]#

Bases: enum.Enum

Types of entities that can be extracted

Initialization

DOI = 'doi'#
ORCID = 'orcid'#
URL = 'url'#
PERSON = 'person'#
CONCEPT = 'concept'#
TITLE = 'title'#
ORGANIZATION = 'organization'#
LOCATION = 'location'#
DATE = 'date'#
NUMBER = 'number'#
UNKNOWN = 'unknown'#
class science_live.pipeline.common.QuestionType(*args, **kwds)[source]#

Bases: enum.Enum

Types of questions

Initialization

WHAT = 'what'#
WHO = 'who'#
WHERE = 'where'#
WHEN = 'when'#
HOW = 'how'#
WHY = 'why'#
LIST = 'list'#
COUNT = 'count'#
GENERAL = 'general'#
class science_live.pipeline.common.ConfidenceLevel(*args, **kwds)[source]#

Bases: enum.Enum

Confidence levels for processing results

Initialization

HIGH = 'high'#
MEDIUM = 'medium'#
LOW = 'low'#
class science_live.pipeline.common.ProcessingContext[source]#

Context passed through the entire pipeline

original_question: str = None#
user_id: Optional[str] = None#
session_id: Optional[str] = None#
preferences: Dict[str, Any] = 'field(...)'#
debug_mode: bool = False#
start_time: float = 'field(...)'#
metadata: Dict[str, Any] = 'field(...)'#
get_elapsed_time() float[source]#

Get elapsed time since pipeline started

class science_live.pipeline.common.ProcessedQuestion[source]#

Output of question processing step

original_text: str = None#
cleaned_text: str = None#
question_type: science_live.pipeline.common.QuestionType = None#
key_phrases: List[str] = None#
potential_entities: List[str] = None#
intent_confidence: float = None#
language: str = 'en'#
processing_metadata: Dict[str, Any] = 'field(...)'#
class science_live.pipeline.common.ExtractedEntity[source]#

An entity extracted from the question

text: str = None#
entity_type: science_live.pipeline.common.EntityType = None#
confidence: float = None#
start_pos: int = None#
end_pos: int = None#
uri: Optional[str] = None#
label: Optional[str] = None#
aliases: List[str] = 'field(...)'#
metadata: Dict[str, Any] = 'field(...)'#
to_sparql_value() str[source]#

Convert entity to SPARQL representation

class science_live.pipeline.common.LinkedEntities[source]#

Output of entity extraction and linking

entities: List[science_live.pipeline.common.ExtractedEntity] = None#
subject_candidates: List[science_live.pipeline.common.ExtractedEntity] = None#
object_candidates: List[science_live.pipeline.common.ExtractedEntity] = None#
linking_confidence: float = None#
linking_metadata: Dict[str, Any] = 'field(...)'#
class science_live.pipeline.common.RosettaStatement[source]#

A structured Rosetta statement

subject: science_live.pipeline.common.ExtractedEntity = None#
statement_type_uri: str = None#
statement_type_label: str = None#
required_object1: Optional[science_live.pipeline.common.ExtractedEntity] = None#
optional_object1: Optional[science_live.pipeline.common.ExtractedEntity] = None#
optional_object2: Optional[science_live.pipeline.common.ExtractedEntity] = None#
optional_object3: Optional[science_live.pipeline.common.ExtractedEntity] = None#
dynamic_label_template: Optional[str] = None#
confidence_level: Optional[float] = None#
context: Optional[str] = None#
is_negation: bool = False#
source_references: List[str] = 'field(...)'#
generation_metadata: Dict[str, Any] = 'field(...)'#
to_natural_language() str[source]#

Convert back to natural language using dynamic label template

class science_live.pipeline.common.GeneratedStatements[source]#

Output of Rosetta statement generation

statements: List[science_live.pipeline.common.RosettaStatement] = None#
generation_confidence: float = None#
alternative_interpretations: List[science_live.pipeline.common.RosettaStatement] = 'field(...)'#
generation_metadata: Dict[str, Any] = 'field(...)'#
class science_live.pipeline.common.SPARQLQuery[source]#

Generated SPARQL query

query_text: str = None#
query_type: str = None#
estimated_complexity: int = None#
timeout_seconds: int = 30#
metadata: Dict[str, Any] = 'field(...)'#
class science_live.pipeline.common.GeneratedQueries[source]#

Output of SPARQL generation

primary_query: science_live.pipeline.common.SPARQLQuery = None#
fallback_queries: List[science_live.pipeline.common.SPARQLQuery] = 'field(...)'#
generation_method: str = 'rosetta_template'#
generation_metadata: Dict[str, Any] = 'field(...)'#
class science_live.pipeline.common.QueryResults[source]#

Raw results from SPARQL execution

success: bool = None#
results: List[Dict[str, Any]] = None#
query_used: str = None#
execution_time: float = None#
total_results: int = None#
error_message: Optional[str] = None#
execution_metadata: Dict[str, Any] = 'field(...)'#
class science_live.pipeline.common.StructuredResult[source]#

A single structured result

nanopub_uri: str = None#
statement_uri: Optional[str] = None#
rosetta_statement: Optional[science_live.pipeline.common.RosettaStatement] = None#
confidence: float = 1.0#
metadata: Dict[str, Any] = 'field(...)'#
raw_data: Dict[str, Any] = 'field(...)'#
class science_live.pipeline.common.ProcessedResults[source]#

Output of result processing

results: List[science_live.pipeline.common.StructuredResult] = None#
total_found: int = None#
processing_confidence: float = None#
groupings: Dict[str, List[science_live.pipeline.common.StructuredResult]] = 'field(...)'#
processing_metadata: Dict[str, Any] = 'field(...)'#
class science_live.pipeline.common.NaturalLanguageResult[source]#

Final natural language output

summary: str = None#
detailed_results: List[str] = None#
confidence_explanation: str = None#
suggestions: List[str] = None#
execution_summary: Dict[str, Any] = None#
generation_metadata: Dict[str, Any] = 'field(...)'#
science_live.pipeline.common.get_confidence_level(confidence: float) science_live.pipeline.common.ConfidenceLevel[source]#

Convert numeric confidence to confidence level enum

science_live.pipeline.common.merge_metadata(*metadata_dicts: Dict[str, Any]) Dict[str, Any][source]#

Merge multiple metadata dictionaries

class science_live.pipeline.common.PipelineStep(config: Dict[str, Any] = None)[source]#

Bases: abc.ABC

Abstract base class for all pipeline steps

Initialization

abstractmethod async process(input_data: Any, context: science_live.pipeline.common.ProcessingContext) Any[source]#

Process input data and return output for next step

get_step_metadata() Dict[str, Any][source]#

Get metadata about this pipeline step

science_live.pipeline.common.validate_processing_context(context: science_live.pipeline.common.ProcessingContext) bool[source]#

Validate processing context

science_live.pipeline.common.validate_extracted_entity(entity: science_live.pipeline.common.ExtractedEntity) bool[source]#

Validate extracted entity

science_live.pipeline.common.validate_rosetta_statement(statement: science_live.pipeline.common.RosettaStatement) bool[source]#

Validate Rosetta statement

science_live.pipeline.common.validate_sparql_query(query: science_live.pipeline.common.SPARQLQuery) bool[source]#

Validate SPARQL query

science_live.pipeline.common.__version__ = '1.0.0'#
science_live.pipeline.common.__author__ = 'Science Live Team'#
science_live.pipeline.common.__description__ = 'Common data models for Science Live pipeline'#