import re
import sys
import urllib.parse
import threading
import time
import requests
from pyquery import PyQuery as pq

sys.path.append('..')
from base.spider import Spider

class Spider(Spider):
    def __init__(self):
        # 基础配置
        self.name = '好色TV（优）'
        self.host = 'https://m.ml0987.online/'
        self.candidate_hosts = [
            "https://m.ml0987.online/"
        ]
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
            'Referer': self.host
        }
        self.timeout = 5000
        
        # 分类映射
        self.class_map = {
            '视频': {'type_id': 'list', 'url_suffix': ''},
            '周榜': {'type_id': 'top7', 'url_suffix': 'top7'},
            '月榜': {'type_id': 'top', 'url_suffix': 'top'},
            '5分钟+': {'type_id': '5min', 'url_suffix': '5min'},
            '10分钟+': {'type_id': 'long', 'url_suffix': 'long'}
        }

    def getName(self):
        return self.name

    def init(self, extend=""):
        # 尝试获取最快可用域名
        self.host = self.get_fastest_host()
        self.headers['Referer'] = self.host

    def isVideoFormat(self, url):
        if not url:
            return False
        return any(fmt in url.lower() for fmt in ['.mp4', '.m3u8', '.flv', '.avi'])

    def manualVideoCheck(self):
        def check(url):
            if not self.isVideoFormat(url):
                return False
            try:
                resp = self.fetch(url, headers=self.headers, method='HEAD', timeout=3)
                return resp.status_code in (200, 302) and 'video' in resp.headers.get('Content-Type', '')
            except:
                return False
        return check

    def get_fastest_host(self):
        """测试候选域名，返回最快可用的"""
        results = {}
        threads = []

        def test_host(url):
            try:
                start_time = time.time()
                resp = requests.head(url, headers=self.headers, timeout=2, allow_redirects=False)
                if resp.status_code in (200, 301, 302):
                    delay = (time.time() - start_time) * 1000
                    results[url] = delay
                else:
                    results[url] = float('inf')
            except:
                results[url] = float('inf')

        for host in self.candidate_hosts:
            t = threading.Thread(target=test_host, args=(host,))
            threads.append(t)
            t.start()
        for t in threads:
            t.join()

        valid_hosts = [(h, d) for h, d in results.items() if d != float('inf')]
        return valid_hosts[0][0] if valid_hosts else self.candidate_hosts[0]

    def homeContent(self, filter):
        result = {}
        # 构造分类列表
        classes = []
        for name, info in self.class_map.items():
            classes.append({
                'type_name': name,
                'type_id': info['type_id']
            })
        result['class'] = classes
        
        try:
            # 获取首页内容
            html = self.fetch_with_retry(self.host, retry=2, timeout=5).text
            data = pq(html)
            
            # 提取视频列表
            vlist = []
            items = data('.row .col-xs-6.col-md-3')
            for item in items.items():
                try:
                    title = item('h5').text().strip()
                    if not title:
                        continue
                    
                    # 提取图片URL
                    style = item('.image').attr('style') or ''
                    pic_match = re.search(r'url\(["\']?([^"\']+)["\']?\)', style)
                    vod_pic = pic_match.group(1) if pic_match else ''
                    if vod_pic and not vod_pic.startswith('http'):
                        vod_pic = f"{self.host.rstrip('/')}/{vod_pic.lstrip('/')}"
                    
                    # 提取时长备注
                    desc = item('.duration').text().strip() or '未知'
                    
                    # 提取视频ID
                    href = item('a').attr('href') or ''
                    if not href:
                        continue
                    vod_id = href.split('/')[-1]
                    if not vod_id.endswith('.htm'):
                        vod_id += '.htm'
                    
                    vlist.append({
                        'vod_id': vod_id,
                        'vod_name': title,
                        'vod_pic': vod_pic,
                        'vod_remarks': desc
                    })
                except Exception as e:
                    print(f"解析首页视频项失败: {e}")
                    continue
            
            result['list'] = vlist
        except Exception as e:
            print(f"首页解析失败: {e}")
            result['list'] = []
        return result

    def homeVideoContent(self):
        return []

    def categoryContent(self, tid, pg, filter, extend):
        result = {}
        try:
            # 匹配分类信息
            cate_info = None
            for name, info in self.class_map.items():
                if info['type_id'] == tid:
                    cate_info = info
                    break
            
            if not cate_info:
                result['list'] = []
                return result
            
            # 关键修复：区分视频分类与其他分类的URL格式
            if tid == 'list':  # 视频分类（type_id为list）
                url = f"{self.host}list-{pg}.htm"  # 格式：list-1.htm、list-2.htm
            else:  # 其他分类（周榜/月榜等）：xxx_list-{pg}.htm
                url = f"{self.host}{cate_info['url_suffix']}_list-{pg}.htm"
            
            # 请求分类页
            html = self.fetch(url, headers=self.headers, timeout=8).text
            html = html.encode('utf-8', errors='ignore').decode('utf-8')
            data = pq(html)
            
            # 提取视频列表
            vlist = []
            items = data('.row .col-xs-6.col-md-3')
            for item in items.items():
                try:
                    title = item('h5').text().strip()
                    if not title:
                        continue
                    
                    style = item('.image').attr('style') or ''
                    pic_match = re.search(r'url\(["\']?([^"\']+)["\']?\)', style)
                    vod_pic = pic_match.group(1) if pic_match else ''
                    if vod_pic and not vod_pic.startswith('http'):
                        vod_pic = f"{self.host.rstrip('/')}/{vod_pic.lstrip('/')}"
                    
                    desc = item('.duration').text().strip() or '未知'
                    
                    href = item('a').attr('href') or ''
                    if not href:
                        continue
                    vod_id = href.split('/')[-1]
                    if not vod_id.endswith('.htm'):
                        vod_id += '.htm'
                    
                    vlist.append({
                        'vod_id': vod_id,
                        'vod_name': title,
                        'vod_pic': vod_pic,
                        'vod_remarks': desc
                    })
                except Exception as e:
                    print(f"解析分类视频项失败: {e}")
                    continue
            
            # 提取总页数
            pagecount = 1
            try:
                pagination = data('.pagination1 li a')
                page_nums = []
                for a in pagination.items():
                    text = a.text().strip()
                    if text.isdigit():
                        page_nums.append(int(text))
                if page_nums:
                    pagecount = max(page_nums)
            except:
                pagecount = 1
            
            result['list'] = vlist
            result['page'] = pg
            result['pagecount'] = pagecount
            result['limit'] = len(vlist)
            result['total'] = 999999
        except Exception as e:
            print(f"分类解析失败: {e}")
            result['list'] = []
            result['page'] = pg
            result['pagecount'] = 1
            result['limit'] = 0
            result['total'] = 0
        return result

    def detailContent(self, ids):
        try:
            if not ids or not ids[0]:
                return {'list': []}
            
            vod_id = ids[0].strip()
            if not vod_id.endswith('.htm'):
                vod_id += '.htm'
            url = f"{self.host}{vod_id.lstrip('/')}"
            
            html = self.fetch_with_retry(url, retry=2, timeout=8).text
            html = html.encode('utf-8', errors='ignore').decode('utf-8')
            data = pq(html)
            
            # 提取标题
            title = data('.panel-title, .video-title, h1').text().strip() or '未知标题'
            
            # 提取封面图
            vod_pic = ''
            poster_style = data('.vjs-poster').attr('style') or ''
            pic_match = re.search(r'url\(["\']?([^"\']+)["\']?\)', poster_style)
            if pic_match:
                vod_pic = pic_match.group(1)
            if not vod_pic:
                vod_pic = data('.video-pic img, .vjs-poster img, .thumbnail img').attr('src') or ''
            if vod_pic and not vod_pic.startswith('http'):
                vod_pic = f"{self.host}{vod_pic.lstrip('/')}"
            
            # 提取时长和观看量
            duration = '未知'
            views = '未知'
            info_items = data('.panel-body .col-md-3, .video-info .info-item, .info p')
            for item in info_items.items():
                text = item.text().strip()
                if '时长' in text or 'duration' in text.lower():
                    duration = text.replace('时长：', '').replace('时长', '').strip()
                elif '观看' in text or 'views' in text.lower():
                    views_match = re.search(r'(\d+\.?\d*[kK]?)次观看', text)
                    if views_match:
                        views = views_match.group(1)
                    else:
                        views = text.replace('观看：', '').replace('观看', '').strip()
            remarks = f"{duration} | {views}"
            
            # 简化版播放线路提取 - 直接基于找到的链接生成第二条线路
            video_urls = []
            
            # 首先尝试提取任意一个m3u8链接
            found_url = None
            
            # 方法1: 从video标签提取
            video_element = data('video#video-play_html5_api')
            if video_element:
                video_src = video_element.attr('src')
                if video_src and '.m3u8' in video_src:
                    found_url = video_src
                    print(f"从video标签找到链接: {found_url}")
            
            # 方法2: 从source标签提取
            if not found_url:
                source_element = data('source#video-source')
                if source_element:
                    source_src = source_element.attr('src')
                    if source_src and '.m3u8' in source_src:
                        found_url = source_src
                        print(f"从source标签找到链接: {found_url}")
            
            # 方法3: 正则搜索
            if not found_url:
                m3u8_matches = re.findall(r'https?://[^\s"\']+\.m3u8[^\s"\']*', html)
                if m3u8_matches:
                    found_url = m3u8_matches[0]
                    print(f"通过正则找到链接: {found_url}")
            
            # 清理找到的URL
            if found_url:
                found_url = found_url.replace('\\/', '/').replace('\\u002F', '/').replace('\\"', '')
                if not found_url.startswith('http'):
                    found_url = f"https:{found_url}" if found_url.startswith('//') else f"https://{found_url}"
                
                # 关键修改：将HD线路放在前面
                if 'hdcdn.online' in found_url:
                    # 如果找到的是HD线路，直接添加，然后生成主线路
                    video_urls.append(found_url)
                    second_url = found_url.replace('hdcdn.online', 'hsex.tv')
                    video_urls.append(second_url)
                    print(f"生成主线路: {second_url}")
                elif 'hsex.tv' in found_url:
                    # 如果找到的是主线路，先生成HD线路，再添加主线路
                    second_url = found_url.replace('hsex.tv', 'hdcdn.online')
                    video_urls.append(second_url)  # HD线路在前
                    video_urls.append(found_url)   # 主线路在后
                    print(f"生成HD线路: {second_url}")
                else:
                    # 如果是其他域名，直接添加
                    video_urls.append(found_url)
                    video_urls.append(found_url)  # 复制一份作为备用
                    print(f"复制备用线路: {found_url}")
            
            print(f"最终播放线路: {video_urls}")
            
            # 构建播放源信息 - 确保HD线路优先显示
            play_from = []
            play_url = []
            
            for i, video_url in enumerate(video_urls):
                if 'hdcdn.online' in video_url:
                    line_name = 'HD线路'  # HD线路优先
                elif 'hsex.tv' in video_url:
                    line_name = '主线路'
                else:
                    line_name = f'线路{i+1}'
                
                play_from.append(line_name)
                play_url.append(f'正片${video_url}')
            
            # 如果没有找到任何线路
            if not play_from:
                play_from = ['好色TV']
                play_url = ['正片$暂无播放地址']
            
            # 确保有两条线路（即使只有一条也复制一份）
            if len(play_from) == 1:
                play_from.append(f'{play_from[0]}-备用')
                play_url.append(play_url[0])
            
            vod = {
                'vod_id': vod_id,
                'vod_name': title,
                'vod_pic': vod_pic,
                'vod_remarks': remarks,
                'vod_play_from': '$$$'.join(play_from),
                'vod_play_url': '$$$'.join(play_url)
            }
            return {'list': [vod]}
        except Exception as e:
            print(f"详情解析失败: {e}")
            import traceback
            traceback.print_exc()
            return {'list': []}

    def searchContent(self, key, quick, pg=1):
        try:
            # 关键词合法性校验
            if not key.strip():
                print("搜索关键词不能为空")
                return {'list': [], 'page': int(pg), 'pagecount': 1, 'limit': 0, 'total': 0}
            
            # 编码关键词
            encoded_key = urllib.parse.quote(key.strip(), encoding='utf-8', errors='replace')
            
            # 修复搜索翻页：根据页码构造正确的搜索URL
            if int(pg) == 1:
                # 第一页：/search.htm?search=关键词&sort=new
                search_url = f"{self.host}search.htm"
            else:
                # 第二页及以后：/search-页码.htm?search=关键词&sort=new  
                search_url = f"{self.host}search-{pg}.htm"
            
            # 搜索参数 - 添加 sort=new 参数
            params = {
                'search': encoded_key,
                'sort': 'new'  # 新增排序参数
            }
            
            # 发起请求
            resp = self.fetch(
                url=search_url,
                headers=self.headers,
                params=params,
                timeout=8
            )
            
            if resp.status_code not in (200, 302):
                print(f"搜索页面请求失败，URL：{resp.url}，状态码：{resp.status_code}")
                return {'list': [], 'page': int(pg), 'pagecount': 1, 'limit': 0, 'total': 0}
            
            # 处理页面内容
            html = resp.text.encode('utf-8', errors='ignore').decode('utf-8')
            data = pq(html)
            
            # 检测无结果场景
            no_result_texts = ['没有找到相关视频', '无搜索结果', 'No results found', '未找到匹配内容']
            no_result = any(data(f'div:contains("{text}"), p:contains("{text}")').text() for text in no_result_texts)
            if no_result:
                print(f"搜索关键词「{key}」第{pg}页无结果")
                return {'list': [], 'page': int(pg), 'pagecount': 1, 'limit': 0, 'total': 0}
            
            # 解析搜索结果
            vlist = []
            items = data('.row .col-xs-6.col-md-3')
            for item in items.items():
                try:
                    title = item('h5').text().strip()
                    if not title:
                        continue
                    
                    style = item('.image').attr('style') or ''
                    pic_match = re.search(r'url\(["\']?([^"\']+)["\']?\)', style)
                    vod_pic = pic_match.group(1) if pic_match else ''
                    if vod_pic and not vod_pic.startswith(('http://', 'https://')):
                        vod_pic = f"{self.host.rstrip('/')}/{vod_pic.lstrip('/')}"
                    
                    desc = item('.duration').text().strip() or '未知时长'
                    
                    href = item('a').attr('href') or ''
                    if not href:
                        continue
                    vod_id = href.split('/')[-1]
                    if not vod_id.endswith('.htm'):
                        vod_id += '.htm'
                    
                    vlist.append({
                        'vod_id': vod_id,
                        'vod_name': title,
                        'vod_pic': vod_pic,
                        'vod_remarks': desc
                    })
                except Exception as e:
                    print(f"解析单条搜索结果失败：{e}（跳过该条）")
                    continue
            
            # 解析总页数
            pagecount = 1
            try:
                pagination = data('.pagination1 li a')
                page_nums = []
                for a in pagination.items():
                    text = a.text().strip()
                    if text.isdigit():
                        page_nums.append(int(text))
                if page_nums:
                    pagecount = max(page_nums)
                print(f"搜索关键词「{key}」分页解析完成，共{pagecount}页")
            except Exception as e:
                print(f"解析分页失败（默认单页）：{e}")
                pagecount = 1
            
            # 返回结果
            total = len(vlist) * pagecount
            print(f"搜索关键词「{key}」第{pg}页处理完成，结果{len(vlist)}条，总页数{pagecount}")
            return {
                'list': vlist,
                'page': int(pg),
                'pagecount': pagecount,
                'limit': len(vlist),
                'total': total
            }
        except Exception as e:
            print(f"搜索功能整体异常：{e}")
            return {
                'list': [],
                'page': int(pg),
                'pagecount': 1,
                'limit': 0,
                'total': 0
            }

    def playerContent(self, flag, id, vipFlags):
        headers = self.headers.copy()
        headers.update({
            'Referer': self.host,
            'Origin': self.host.rstrip('/'),
            'Host': urllib.parse.urlparse(self.host).netloc,
        })
        
        return {
            'parse': 1,
            'url': id,
            'header': headers,
            'double': True
        }

    def localProxy(self, param):
        try:
            url = param['url']
            if url and not url.startswith(('http://', 'https://')):
                url = f"{self.host.rstrip('/')}/{url.lstrip('/')}"
            
            img_headers = self.headers.copy()
            img_headers.update({'Accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8'})
            
            res = self.fetch(url, headers=img_headers, timeout=10)
            content_type = res.headers.get('Content-Type', 'image/jpeg')
            
            return [200, content_type, res.content]
        except Exception as e:
            print(f"图片代理失败: {e}")
            return [200, 'image/jpeg', b'']

    def fetch_with_retry(self, url, retry=2, timeout=5):
        for i in range(retry + 1):
            try:
                resp = self.fetch(url, headers=self.headers, timeout=timeout)
                if resp.status_code in (200, 301, 302):
                    return resp
                print(f"请求{url}返回状态码{resp.status_code}，重试中...")
            except Exception as e:
                print(f"第{i+1}次请求{url}失败: {e}")
            if i < retry:
                time.sleep(0.5)
        return type('obj', (object,), {'text': '', 'status_code': 404})

    def fetch(self, url, headers=None, timeout=5, method='GET', params=None):
        headers = headers or self.headers
        params = params or {}
        try:
            # 直接请求目标URL
            if method.upper() == 'GET':
                resp = requests.get(
                    url,
                    headers=headers, 
                    timeout=timeout, 
                    allow_redirects=True,
                    params=params
                )
            elif method.upper() == 'HEAD':
                resp = requests.head(
                    url,
                    headers=headers, 
                    timeout=timeout, 
                    allow_redirects=False,
                    params=params
                )
            else:
                resp = requests.get(
                    url,
                    headers=headers, 
                    timeout=timeout, 
                    allow_redirects=True,
                    params=params
                )
            
            # 自动适配编码，避免中文乱码
            if 'charset' in resp.headers.get('Content-Type', '').lower():
                resp.encoding = resp.apparent_encoding
            else:
                resp.encoding = 'utf-8'
            return resp
        except Exception as e:
            print(f"网络请求失败({url}): {e}")
            return type('obj', (object,), {
                'text': '', 
                'status_code': 500, 
                'headers': {},
                'url': url
            })