science_live.pipeline.common
#
Science Live Pipeline: Common Data Models#
Shared data structures used throughout the steps of the Science Live processing pipeline. All pipeline steps import from this module to ensure consistent data flow.
Author: Science Live Team Version: 0.0.1
Module Contents#
Classes#
Types of entities that can be extracted |
|
Types of questions |
|
Confidence levels for processing results |
|
Context passed through the entire pipeline |
|
Output of question processing step |
|
An entity extracted from the question |
|
Output of entity extraction and linking |
|
A structured Rosetta statement |
|
Output of Rosetta statement generation |
|
Generated SPARQL query |
|
Output of SPARQL generation |
|
Raw results from SPARQL execution |
|
A single structured result |
|
Output of result processing |
|
Final natural language output |
|
Abstract base class for all pipeline steps |
Functions#
Convert numeric confidence to confidence level enum |
|
Merge multiple metadata dictionaries |
|
Validate processing context |
|
Validate extracted entity |
|
Validate Rosetta statement |
|
Validate SPARQL query |
Data#
API#
- science_live.pipeline.common.__all__ = ['ProcessingContext', 'ProcessedQuestion', 'ExtractedEntity', 'LinkedEntities', 'RosettaStatement', ...#
- class science_live.pipeline.common.EntityType(*args, **kwds)[source]#
Bases:
enum.Enum
Types of entities that can be extracted
Initialization
- DOI = 'doi'#
- ORCID = 'orcid'#
- URL = 'url'#
- PERSON = 'person'#
- CONCEPT = 'concept'#
- TITLE = 'title'#
- ORGANIZATION = 'organization'#
- LOCATION = 'location'#
- DATE = 'date'#
- NUMBER = 'number'#
- UNKNOWN = 'unknown'#
- class science_live.pipeline.common.QuestionType(*args, **kwds)[source]#
Bases:
enum.Enum
Types of questions
Initialization
- WHAT = 'what'#
- WHO = 'who'#
- WHERE = 'where'#
- WHEN = 'when'#
- HOW = 'how'#
- WHY = 'why'#
- LIST = 'list'#
- COUNT = 'count'#
- GENERAL = 'general'#
- class science_live.pipeline.common.ConfidenceLevel(*args, **kwds)[source]#
Bases:
enum.Enum
Confidence levels for processing results
Initialization
- HIGH = 'high'#
- MEDIUM = 'medium'#
- LOW = 'low'#
- class science_live.pipeline.common.ProcessingContext[source]#
Context passed through the entire pipeline
- class science_live.pipeline.common.ProcessedQuestion[source]#
Output of question processing step
- question_type: science_live.pipeline.common.QuestionType = None#
- class science_live.pipeline.common.ExtractedEntity[source]#
An entity extracted from the question
- entity_type: science_live.pipeline.common.EntityType = None#
- class science_live.pipeline.common.LinkedEntities[source]#
Output of entity extraction and linking
- entities: List[science_live.pipeline.common.ExtractedEntity] = None#
- subject_candidates: List[science_live.pipeline.common.ExtractedEntity] = None#
- object_candidates: List[science_live.pipeline.common.ExtractedEntity] = None#
- class science_live.pipeline.common.RosettaStatement[source]#
A structured Rosetta statement
- subject: science_live.pipeline.common.ExtractedEntity = None#
- required_object1: Optional[science_live.pipeline.common.ExtractedEntity] = None#
- optional_object1: Optional[science_live.pipeline.common.ExtractedEntity] = None#
- optional_object2: Optional[science_live.pipeline.common.ExtractedEntity] = None#
- optional_object3: Optional[science_live.pipeline.common.ExtractedEntity] = None#
- class science_live.pipeline.common.GeneratedStatements[source]#
Output of Rosetta statement generation
- statements: List[science_live.pipeline.common.RosettaStatement] = None#
- alternative_interpretations: List[science_live.pipeline.common.RosettaStatement] = 'field(...)'#
- class science_live.pipeline.common.GeneratedQueries[source]#
Output of SPARQL generation
- primary_query: science_live.pipeline.common.SPARQLQuery = None#
- fallback_queries: List[science_live.pipeline.common.SPARQLQuery] = 'field(...)'#
- class science_live.pipeline.common.StructuredResult[source]#
A single structured result
- rosetta_statement: Optional[science_live.pipeline.common.RosettaStatement] = None#
- class science_live.pipeline.common.ProcessedResults[source]#
Output of result processing
- results: List[science_live.pipeline.common.StructuredResult] = None#
- groupings: Dict[str, List[science_live.pipeline.common.StructuredResult]] = 'field(...)'#
- science_live.pipeline.common.get_confidence_level(confidence: float) science_live.pipeline.common.ConfidenceLevel [source]#
Convert numeric confidence to confidence level enum
- science_live.pipeline.common.merge_metadata(*metadata_dicts: Dict[str, Any]) Dict[str, Any] [source]#
Merge multiple metadata dictionaries
- class science_live.pipeline.common.PipelineStep(config: Dict[str, Any] = None)[source]#
Bases:
abc.ABC
Abstract base class for all pipeline steps
Initialization
- abstractmethod async process(input_data: Any, context: science_live.pipeline.common.ProcessingContext) Any [source]#
Process input data and return output for next step
- science_live.pipeline.common.validate_processing_context(context: science_live.pipeline.common.ProcessingContext) bool [source]#
Validate processing context
- science_live.pipeline.common.validate_extracted_entity(entity: science_live.pipeline.common.ExtractedEntity) bool [source]#
Validate extracted entity
- science_live.pipeline.common.validate_rosetta_statement(statement: science_live.pipeline.common.RosettaStatement) bool [source]#
Validate Rosetta statement
- science_live.pipeline.common.validate_sparql_query(query: science_live.pipeline.common.SPARQLQuery) bool [source]#
Validate SPARQL query
- science_live.pipeline.common.__version__ = '1.0.0'#
- science_live.pipeline.common.__author__ = 'Science Live Team'#
- science_live.pipeline.common.__description__ = 'Common data models for Science Live pipeline'#