Source code for science_live.setup.wordnet
# =============================================================================
# science_live/setup/wordnet.py - for wn 0.12.0 API
# =============================================================================
"""
WordNet setup module - accessible via console scripts.
Compatible with wn 0.12.0 API changes.
"""
import sys
import argparse
import logging
from pathlib import Path
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
logger = logging.getLogger(__name__)
[docs]
def check_wn_availability():
"""Check if wn package is available"""
try:
import wn
logger.info("✓ WordNet package (wn) is available")
return True
except ImportError:
logger.error("✗ WordNet package (wn) not found")
logger.error("Install with: pip install science-live[enhanced]")
return False
[docs]
def get_lexicon_info(lexicon):
"""Get lexicon information with API compatibility"""
try:
# Try wn 0.12.0+ API first
if hasattr(lexicon, 'specifier'):
identifier = lexicon.specifier()
elif hasattr(lexicon, 'id') and hasattr(lexicon, 'version'):
identifier = f"{lexicon.id}:{lexicon.version}"
else:
identifier = str(lexicon)
# Get label/name
if hasattr(lexicon, 'label'):
label = lexicon.label
elif hasattr(lexicon, 'name'):
label = lexicon.name
else:
label = "Unknown"
return identifier, label
except Exception as e:
logger.debug(f"Error getting lexicon info: {e}")
return str(lexicon), "Unknown"
[docs]
def download_wordnet_data():
"""Download Open English WordNet data"""
try:
import wn
logger.info("Checking for existing WordNet data...")
# Check if Open English WordNet is already installed
try:
lexicons = wn.lexicons()
# Check for existing installations
oewn_installed = False
ewn_installed = False
for lexicon in lexicons:
identifier, _ = get_lexicon_info(lexicon)
if 'oewn' in identifier.lower():
oewn_installed = True
elif 'ewn' in identifier.lower():
ewn_installed = True
if oewn_installed:
logger.info("✓ Open English WordNet data already installed")
return True
elif ewn_installed:
logger.info("✓ English WordNet data found (older version)")
logger.info("Consider upgrading to Open English WordNet 2024")
return True
except Exception as e:
logger.warning(f"Could not check existing lexicons: {e}")
logger.info("Downloading Open English WordNet 2024...")
logger.info("This may take a few minutes...")
# Download Open English WordNet 2024
wn.download('oewn:2024')
logger.info("✓ Open English WordNet 2024 downloaded successfully")
return True
except Exception as e:
logger.error(f"✗ Failed to download WordNet data: {e}")
logger.error("Check your internet connection and try again")
return False
[docs]
def verify_installation():
"""Verify WordNet installation"""
try:
import wn
logger.info("Verifying WordNet installation...")
# Test basic functionality with different API approaches
try:
# Try with Open English WordNet first
en = wn.Wordnet('oewn:2024')
synsets = en.synsets('science')
except:
try:
# Fallback to general function
synsets = wn.synsets('science')
except:
# Try with any available lexicon
lexicons = wn.lexicons()
if lexicons:
# Use first available lexicon
first_lexicon = lexicons[0]
identifier, _ = get_lexicon_info(first_lexicon)
en = wn.Wordnet(identifier)
synsets = en.synsets('science')
else:
synsets = []
if synsets:
logger.info(f"✓ WordNet working - found {len(synsets)} synsets for 'science'")
return True
else:
logger.warning("⚠ WordNet installed but not functioning properly")
return False
except Exception as e:
logger.error(f"✗ WordNet verification failed: {e}")
return False
[docs]
def status():
"""Check WordNet status with improved API compatibility"""
try:
import wn
print("WordNet Status Report")
print("=" * 25)
# Get lexicons with error handling
try:
lexicons = wn.lexicons()
print(f"Available lexicons: {len(lexicons)}")
if not lexicons:
print(" No lexicons found")
print(" Run: setup-wordnet install")
return False
# Display lexicon information
for lexicon in lexicons:
try:
identifier, label = get_lexicon_info(lexicon)
print(f" - {identifier}: {label}")
except Exception as e:
print(f" - Error reading lexicon: {e}")
except Exception as e:
print(f"Error listing lexicons: {e}")
return False
# Test functionality with multiple approaches
print(f"\nFunctionality test:")
test_words = ['science', 'research', 'publication']
for word in test_words:
try:
# Try multiple approaches to get synsets
synsets = None
# Method 1: Try with specific wordnet
try:
if lexicons:
first_lexicon = lexicons[0]
identifier, _ = get_lexicon_info(first_lexicon)
en = wn.Wordnet(identifier)
synsets = en.synsets(word)
except:
pass
# Method 2: Try general function
if not synsets:
try:
synsets = wn.synsets(word)
except:
pass
if synsets:
print(f" {word}: {len(synsets)} synsets ✓")
else:
print(f" {word}: No synsets found ⚠")
except Exception as e:
print(f" {word}: Error - {e}")
print("\n✓ WordNet status check complete")
return True
except ImportError:
print("✗ WordNet package not available")
print("Install with: pip install wn")
return False
except Exception as e:
print(f"✗ WordNet status check failed: {e}")
return False
[docs]
def main():
"""Main WordNet setup function"""
parser = argparse.ArgumentParser(description="Setup WordNet for Science Live")
parser.add_argument(
'command',
nargs='?',
choices=['install', 'status', 'verify'],
default='install',
help='WordNet command (default: install)'
)
args = parser.parse_args()
print("🧠 Science Live WordNet Setup")
print("=" * 30)
if args.command == 'status':
return status()
elif args.command == 'verify':
return verify_installation()
elif args.command == 'install':
success = True
if not check_wn_availability():
return False
if not download_wordnet_data():
success = False
if not verify_installation():
success = False
if success:
print("\n✅ WordNet setup completed successfully!")
print("\nNext steps:")
print(" - Test: setup-wordnet status")
print(" - Validate: validate-science-live --wordnet")
print(" - Use enhanced features in your Science Live pipeline")
else:
print("\n❌ WordNet setup encountered issues")
print("Try running individual commands:")
print(" setup-wordnet status")
print(" setup-wordnet verify")
return success
if __name__ == "__main__":
sys.exit(0 if main() else 1)