"""
LLM增强版三维模型语义搜索引擎
集成LLM进行：
1. 零件语义分析增强
2. 用户查询意图理解
3. 智能查询扩展
4. 自然语言交互
"""

import json
import os
from typing import List, Dict, Any, Optional, Tuple
from dataclasses import dataclass, field
from pathlib import Path

# 导入基础模块
from feature_semantic_analyzer import FeatureSemanticAnalyzer, FeatureSemantics
from search_engine import CADModelSearchEngine, SearchResult, SimpleTextVectorizer
from llm_enhanced import LLMEnhancedAnalyzer, LLMConfig, create_llm_provider


@dataclass
class EnhancedSearchResult(SearchResult):
    """增强的搜索结果"""
    llm_explanation: str = ""  # LLM生成的解释
    related_parts: List[str] = field(default_factory=list)  # 相关零件推荐
    specifications: Dict[str, Any] = field(default_factory=dict)  # 规格参数


class LLMEnhancedSearchEngine(CADModelSearchEngine):
    """LLM增强的搜索引擎"""
    
    def __init__(self, llm_config: LLMConfig = None):
        super().__init__()
        
        # 初始化LLM
        self.llm_config = llm_config or LLMConfig()
        self.llm_analyzer = LLMEnhancedAnalyzer(self.llm_config)
        self.llm_available = self.llm_analyzer.provider.is_available()
        
        if self.llm_available:
            print(f"✓ LLM增强已启用 (提供者: {self.llm_config.provider})")
        else:
            print("⚠ LLM不可用，使用规则引擎模式")
        
        # 增强的同义词（可由LLM动态扩展）
        self.dynamic_synonyms = {}
    
    def index_model_with_llm(self, features_data: Dict, 
                             rule_based_semantics: Dict = None):
        """使用LLM增强索引"""
        file_id = features_data.get("file_info", {}).get("filename", "").replace(".ccm", "")
        
        # 基础语义数据
        semantic_data = rule_based_semantics.copy() if rule_based_semantics else {}
        semantic_data["file_id"] = file_id
        
        # LLM增强
        if self.llm_available:
            llm_result = self.llm_analyzer.analyze_part(features_data)
            
            if llm_result:
                # 增强描述
                if llm_result.get("description"):
                    semantic_data["description"] = llm_result["description"]
                    semantic_data["llm_description"] = llm_result["description"]
                
                # 增强零件类型
                if llm_result.get("part_type"):
                    semantic_data["part_type"] = llm_result["part_type"]
                    semantic_data["part_category"] = llm_result.get("part_category", "")
                
                # 增强关键词
                enhanced_keywords = set(semantic_data.get("keywords", []))
                enhanced_keywords.update(llm_result.get("keywords", []))
                enhanced_keywords.update(llm_result.get("industry_terms", []))
                enhanced_keywords.update(llm_result.get("similar_parts", []))
                semantic_data["keywords"] = list(enhanced_keywords)
                
                # 存储额外信息
                semantic_data["function"] = llm_result.get("function", "")
                semantic_data["similar_parts"] = llm_result.get("similar_parts", [])
                semantic_data["standards"] = llm_result.get("standards", [])
                semantic_data["material_suggestion"] = llm_result.get("material_suggestion", "")
                semantic_data["manufacturing_process"] = llm_result.get("manufacturing_process", "")
                
                # 重建可搜索文本
                searchable_parts = [
                    semantic_data.get("filename", ""),
                    semantic_data.get("part_type", ""),
                    semantic_data.get("part_category", ""),
                    semantic_data.get("description", ""),
                    semantic_data.get("function", ""),
                    " ".join(semantic_data.get("keywords", [])),
                    " ".join(semantic_data.get("industry_terms", [])),
                ]
                semantic_data["searchable_text"] = " ".join(filter(None, searchable_parts))
        
        # 调用父类索引方法
        self.index_model(semantic_data)
        
        return semantic_data
    
    def smart_search(self, query: str, top_k: int = 10,
                     explain: bool = True) -> List[EnhancedSearchResult]:
        """智能搜索（带LLM增强）"""
        
        # 1. 使用LLM理解查询意图
        query_understanding = {}
        expanded_queries = [query]
        
        if self.llm_available:
            query_understanding = self.llm_analyzer.understand_query(query)
            if query_understanding:
                # 获取扩展查询词
                expanded_queries = [query]
                expanded_queries.extend(query_understanding.get("expanded_queries", []))
                expanded_queries.extend(query_understanding.get("synonyms", []))
                expanded_queries.extend(query_understanding.get("part_types", []))
                expanded_queries = list(set(expanded_queries))[:10]  # 限制数量
        
        # 2. 执行多查询搜索
        all_scores = {}
        for q in expanded_queries:
            results = self.hybrid_search(q, top_k * 2)
            weight = 1.0 if q == query else 0.5  # 原始查询权重更高
            
            for r in results:
                if r.file_id not in all_scores:
                    all_scores[r.file_id] = 0
                all_scores[r.file_id] += r.score * weight
        
        # 3. 归一化并排序
        if all_scores:
            max_score = max(all_scores.values())
            for fid in all_scores:
                all_scores[fid] /= max_score
        
        sorted_ids = sorted(all_scores.keys(), 
                           key=lambda x: all_scores[x], 
                           reverse=True)[:top_k]
        
        # 4. 构建增强结果
        results = []
        for file_id in sorted_ids:
            data = self.models.get(file_id, {})
            
            # 生成高亮
            highlights = []
            for kw in data.get("keywords", []):
                for eq in expanded_queries:
                    if eq.lower() in kw.lower() or kw.lower() in eq.lower():
                        highlights.append(f"关键词: {kw}")
                        break
            
            # LLM解释（可选）
            llm_explanation = ""
            if explain and self.llm_available and len(results) < 3:
                llm_explanation = self._generate_match_explanation(
                    query, query_understanding, data
                )
            
            result = EnhancedSearchResult(
                file_id=file_id,
                filename=data.get("filename", ""),
                part_type=data.get("part_type", ""),
                score=all_scores[file_id],
                match_type="llm_enhanced" if self.llm_available else "hybrid",
                highlights=highlights[:5],
                description=data.get("description", "")[:300],
                llm_explanation=llm_explanation,
                related_parts=data.get("similar_parts", [])[:3],
                specifications={}
            )
            results.append(result)
        
        return results
    
    def _generate_match_explanation(self, query: str, 
                                   query_understanding: Dict,
                                   model_data: Dict) -> str:
        """生成匹配解释"""
        if not self.llm_available:
            return ""
        
        prompt = f"""用户搜索："{query}"
找到零件：{model_data.get('filename', '')}
零件类型：{model_data.get('part_type', '')}
零件描述：{model_data.get('description', '')[:200]}

请用一句话（30字以内）解释为什么这个零件匹配用户的搜索。"""
        
        response = self.llm_analyzer.provider.generate(prompt)
        return response.strip()[:100] if response else ""
    
    def conversational_search(self, user_message: str, 
                              history: List[Dict] = None) -> Dict:
        """对话式搜索"""
        if not self.llm_available:
            # 降级到普通搜索
            results = self.search(user_message, top_k=5)
            return {
                "message": f"找到 {len(results)} 个相关零件",
                "results": results,
                "suggestions": []
            }
        
        # 构建对话提示
        history_text = ""
        if history:
            for h in history[-5:]:  # 最近5轮
                role = "用户" if h.get("role") == "user" else "助手"
                history_text += f"{role}: {h.get('content', '')}\n"
        
        prompt = f"""你是一个专业的机械零件搜索助手。

{f"对话历史：{history_text}" if history_text else ""}

用户消息：{user_message}

请分析用户需求，判断：
1. 用户是否在搜索零件？如果是，提取搜索关键词
2. 用户是否在询问零件信息？
3. 用户是否需要帮助？

输出JSON：
```json
{{
    "intent": "search/info/help/other",
    "search_query": "提取的搜索词（如果是搜索）",
    "response": "给用户的回复",
    "follow_up_questions": ["建议的后续问题"]
}}
```"""
        
        response = self.llm_analyzer.provider.generate(prompt)
        parsed = self.llm_analyzer._parse_json_response(response)
        
        result = {
            "message": parsed.get("response", "我来帮您搜索相关零件"),
            "results": [],
            "suggestions": parsed.get("follow_up_questions", [])
        }
        
        # 如果是搜索意图，执行搜索
        if parsed.get("intent") == "search" and parsed.get("search_query"):
            search_results = self.smart_search(parsed["search_query"], top_k=5)
            result["results"] = search_results
            
            # 生成搜索结果摘要
            if search_results:
                result["message"] = f"为您找到 {len(search_results)} 个相关零件：\n"
                for i, r in enumerate(search_results[:3], 1):
                    result["message"] += f"{i}. {r.filename} ({r.part_type})\n"
        
        return result
    
    def get_part_details(self, file_id: str) -> Dict:
        """获取零件详细信息（LLM增强）"""
        if file_id not in self.models:
            return {}
        
        data = self.models[file_id].copy()
        
        # 如果有LLM，生成更丰富的信息
        if self.llm_available and not data.get("llm_enriched"):
            prompt = f"""为以下机械零件生成详细的技术说明：

零件名称：{data.get('filename', '')}
零件类型：{data.get('part_type', '')}
现有描述：{data.get('description', '')}
关键词：{', '.join(data.get('keywords', [])[:10])}

请提供：
1. 详细技术描述（100字）
2. 典型应用场景（3个）
3. 选型建议
4. 注意事项

输出JSON格式。"""
            
            response = self.llm_analyzer.provider.generate(prompt)
            enriched = self.llm_analyzer._parse_json_response(response)
            
            if enriched:
                data["technical_description"] = enriched.get("technical_description", "")
                data["applications"] = enriched.get("applications", [])
                data["selection_guide"] = enriched.get("selection_guide", "")
                data["notes"] = enriched.get("notes", "")
                data["llm_enriched"] = True
        
        return data


def create_enhanced_search_engine(json_files: List[str],
                                  llm_config: LLMConfig = None) -> LLMEnhancedSearchEngine:
    """创建LLM增强的搜索引擎"""
    
    # 初始化
    analyzer = FeatureSemanticAnalyzer()
    engine = LLMEnhancedSearchEngine(llm_config)
    
    print("\n" + "=" * 60)
    print("构建LLM增强搜索索引")
    print("=" * 60)
    
    for json_file in json_files:
        try:
            # 读取原始JSON
            with open(json_file, 'r', encoding='utf-8') as f:
                features_data = json.load(f)
            
            # 规则引擎分析
            semantics = analyzer.analyze_json(json_file)
            rule_based = analyzer.to_dict(semantics)
            
            # LLM增强索引
            enhanced = engine.index_model_with_llm(features_data, rule_based)
            
            print(f"✓ {semantics.filename}")
            print(f"  类型: {enhanced.get('part_type', 'N/A')}")
            if enhanced.get('function'):
                print(f"  功能: {enhanced['function'][:50]}...")
            
        except Exception as e:
            print(f"✗ 索引失败: {json_file} - {e}")
    
    # 构建向量索引
    engine.build_vector_index()
    
    print(f"\n索引完成，共 {len(engine.models)} 个模型")
    
    return engine


# ============== 测试 ==============

if __name__ == "__main__":
    # LLM配置
    llm_config = LLMConfig(
        provider="anthropic",
        model="claude-sonnet-4-20250514",
        api_key=os.getenv("ANTHROPIC_API_KEY", ""),
    )
    
    # 测试文件
    test_files = [
        "/mnt/user-data/uploads/AGES-50_features.json",
        "/mnt/user-data/uploads/prt0001_features.json",
        "/mnt/user-data/uploads/凹端内六角紧定螺钉_PSEP-6-16-A__features.json",
        "/mnt/user-data/uploads/内六角喉塞_PSEG-M22-A__features.json",
        "/mnt/user-data/uploads/内六角机螺钉_内六角平头带垫螺栓_PSHFP-M6-25-A__features.json",
    ]
    
    # 创建引擎
    engine = create_enhanced_search_engine(test_files, llm_config)
    
    # 测试智能搜索
    print("\n" + "=" * 60)
    print("智能搜索测试")
    print("=" * 60)
    
    test_queries = ["三通管件", "M6螺丝", "堵头", "紧固件"]
    
    for query in test_queries:
        print(f"\n🔍 搜索: '{query}'")
        print("-" * 40)
        
        results = engine.smart_search(query, top_k=3, explain=True)
        
        for i, r in enumerate(results, 1):
            print(f"  {i}. {r.filename}")
            print(f"     类型: {r.part_type} | 分数: {r.score:.3f}")
            if r.llm_explanation:
                print(f"     解释: {r.llm_explanation}")
            if r.related_parts:
                print(f"     相关: {', '.join(r.related_parts)}")
