from flask import Flask, request, jsonify
import cloudscraper
import json
import re
from scrapy import Selector
import os

app = Flask(__name__)

def scrape_with_snapsave_engine(url, site_url="https://snapsave.app"):
    try:
        scraper = cloudscraper.create_scraper()
        action_url = f"{site_url}/action.php?lang=en"
        
        headers = {
            "Referer": f"{site_url}/",
            "Origin": site_url,
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
        }
        
        res = scraper.post(action_url, data={"url": url}, headers=headers)
        if res.status_code != 200:
            return {"error": f"Site {site_url} returned status {res.status_code}"}
        
        html_content = res.text
        
        # Check if we already have the download items without decoding
        if "download-items" in html_content:
            html_result = html_content
        else:
            # Try to find and decode the obfuscated script
            # The pattern is usually eval(function(h,u,n,t,e,r){...}(h,u,n,t,e,r))
            match = re.search(r'eval\(function\(h,u,n,t,e,r\)\{.*?\}\((.*?)\)\)', html_content)
            if not match:
                return {"error": "Could not find download links or obfuscated script in response"}
            
            params_str = match.group(1)
            import ast
            try:
                # Wrap in brackets to handle as tuple
                params = ast.literal_eval(f"({params_str})")
            except Exception as e:
                return {"error": f"Failed to parse decoder params: {str(e)}"}
            
            if not isinstance(params, (list, tuple)) or len(params) < 4:
                return {"error": f"Invalid decoder params count: {len(params) if isinstance(params, (list, tuple)) else 'not a list'}"}

            # Map parameters based on types to be extra safe
            # Usually: h=str, u=int/str, n=str, t=str, e=int, r=int
            # But sometimes they swap n and t or h and others
            h, u, n, t = params[0], params[1], params[2], params[3]
            
            def decode_snapsave(h, u, n, t, e, r):
                # Robust SnapSave/SaveInsta deobfuscator (Hunter Logic)
                if isinstance(t, str):
                    # Old simple substitution style
                    result = ""
                    for char in h:
                        if char in n:
                            idx = n.index(char)
                            if idx < len(t):
                                result += str(t[idx])
                            else:
                                result += char
                        else:
                            result += char
                    return result
                else:
                    # New split-and-base-convert style
                    # r = ""; for (var i = 0, len = h.length; i < len; i++) { var s = ""; while (h[i] !== n[e]) { s += h[i]; i++ } ...
                    try:
                        decoded = ""
                        parts = h.split(n[e])
                        for part in parts:
                            if not part: continue
                            s = part
                            # for (var j = 0; j < n.length; j++) s = s.replace(new RegExp(n[j], "g"), j);
                            for j, char in enumerate(n):
                                s = s.replace(char, str(j))
                            # String.fromCharCode(parseInt(s, e) - t)
                            decoded += chr(int(s, e) - t)
                        return decoded
                    except Exception as err:
                        return f"Decoding failed: {str(err)}"

            # Call the hunter with all params
            html_result = decode_snapsave(*params)
            if html_result.startswith("Decoding failed:"):
                 return {"error": html_result}
            
            # Check if the result is actually a JS error about private video
            if "error_video_private" in html_result or "private" in html_result.lower():
                if "is private" in html_result or "Private Video Downloader" in html_result:
                    return {"error": "This video is private. Please use a public video URL or the private downloader tool.", "is_private": True}

        # If it's JS but not a known error, it might still be problematic
        if html_result.strip().startswith("if("):
            # If we expected HTML but got JS, check for common patterns
            if "download-items" not in html_result:
                 # It's likely an unhandled error script
                 return {"error": f"Site returned script instead of content: {html_result[:100]}..."}

        sel = Selector(text=html_result)
        
        # Extract links from the table
        rows = sel.css('table.table tbody tr')
        links = []
        for row in rows:
            quality = row.css('td.video-quality::text').get() or ""
            # Search for download link in anchors or buttons
            download_url = row.css('td a.button::attr(href)').get()
            if not download_url:
                onclick = row.css('td button::attr(onclick)').get()
                if onclick and "window.open('" in onclick:
                    download_url = onclick.split("window.open('")[1].split("'")[0]
            
            if download_url:
                if download_url.startswith('/'):
                    download_url = f"{site_url}{download_url}"
                
                # Cleanup and filter out progress/etc links
                if "get_progress" not in download_url:
                    links.append({
                        "quality": quality.strip().lower(),
                        "url": download_url
                    })
        
        if not links:
            # Fallback for alternative SaveClip/SnapSave layout
            alt_btns = sel.css('a.download-items__btn::attr(href)').getall()
            for btn in alt_btns:
                if btn and "get_progress" not in btn:
                    links.append({"quality": "hd", "url": btn if btn.startswith('http') else f"{site_url}{btn}"})
            
        if not links:
            # Check if there's an alert message in the HTML
            alert = sel.css('.alert::text').get() or sel.css('#alert::text').get()
            if alert:
                return {"error": f"Site message: {alert.strip()}"}
            return {"error": "Could not extract any download links from the site results. The video might be private or unavailable."}
        
        # Select best quality (prefer HD/720/1080)
        best_link = links[0]
        for l in links:
            q = l['quality']
            if any(x in q for x in ['hd', '720', '1080', 'render', 'best']):
                best_link = l
                break
                
        title = sel.css('h3::text').get() or sel.css('div.content h3::text').get() or "Video Download"
        thumbnail = sel.css('div.thumbnail img::attr(src)').get() or ""
        
        return {
            "title": title.strip(),
            "thumbnail": thumbnail,
            "download_url": best_link['url'],
            "source": "facebook" if "snapsave" in site_url else "instagram"
        }
        
    except Exception as e:
        import traceback
        return {"error": f"Scraper engine failed: {str(e)}", "details": traceback.format_exc()}

def scrape_facebook_fdown(url):
    try:
        scraper = cloudscraper.create_scraper()
        res = scraper.post("https://fdown.net/download.php", data={"URLz": url}, headers={
            "Referer": "https://fdown.net/",
            "Origin": "https://fdown.net",
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
        })
        
        if res.status_code != 200:
            return {"error": f"FDown returned status {res.status_code}"}
            
        sel = Selector(text=res.text)
        # IDs for links on fdown.net are sdlink and hdlink
        download_url = sel.css('#hdlink::attr(href)').get() or sel.css('#sdlink::attr(href)').get()
        
        if download_url:
            return {
                "title": "Facebook Video",
                "thumbnail": "",
                "download_url": download_url,
                "source": "facebook"
            }
        
        if "Verify you are human" in res.text:
            return {"error": "FDown blocked by Cloudflare challenge."}
            
        return {"error": "Could not extract link from FDown."}
    except Exception as e:
        return {"error": f"FDown scraper failed: {str(e)}"}

def scrape_facebook(fb_url):
    result = scrape_with_snapsave_engine(fb_url, "https://snapsave.app")
    if "error" in result and not result.get("is_private"):
        # Try FDown as fallback for general errors
        fdown_result = scrape_facebook_fdown(fb_url)
        if "error" not in fdown_result:
            return fdown_result
    return result

def scrape_instagram(ig_url):
    """
    Instagram scraper using multiple SnapSave/SaveInsta fallback engines.
    Avoids Instaloader (403 errors) and uses browser-based scraping instead.
    """
    fb_engines = [
        "https://snapinsta.app",
        "https://snapsave.app",
        "https://saveclip.app"
    ]
    
    print(f"Scraping Instagram: {ig_url}")
    last_error = "All Instagram scrapers failed"
    
    for engine in fb_engines:
        try:
            print(f"Attempting {engine}...")
            result = scrape_with_snapsave_engine(ig_url, engine)
            if "error" not in result:
                return result
                
            last_error = result.get("error", last_error)
            # If it's private, don't waste time on other mirrors
            if result.get("is_private"):
                break
        except Exception as e:
            print(f"Engine {engine} failed: {str(e)}")
            continue
            
    return {"error": f"Instagram fetch failed: {last_error}"}

def scrape_youtube(yt_url):
    try:
        scraper = cloudscraper.create_scraper()
        # Using a reliable public API bridge for YouTube
        api_url = "https://save-from.net/api/convert"
        res = scraper.post(api_url, data={"url": yt_url})
        data = res.json()
        
        if data.get("url"):
            return {
                "title": data.get("meta", {}).get("title", "YouTube Video"),
                "thumbnail": data.get("meta", {}).get("thumbnail", ""),
                "download_url": data["url"][0]["url"],
                "source": "youtube"
            }
        return {"error": "Could not extract YouTube download link."}
    except Exception as e:
        return {"error": f"YouTube scraper failed: {str(e)}"}

def scrape_pinterest(pin_url):
    try:
        scraper = cloudscraper.create_scraper()
        res = scraper.get(pin_url)
        sel = Selector(text=res.text)
        
        # Pinterest videos are usually in the video-tag or a specific meta tag
        video_url = sel.xpath('//video/@src').get() or sel.xpath('//meta[@property="og:video:secure_url"]/@content').get()
        
        if not video_url:
            return {"error": "No video found on this Pinterest page."}
            
        return {
            "title": sel.xpath('//meta[@property="og:title"]/@content').get() or "Pinterest Media",
            "thumbnail": sel.xpath('//meta[@property="og:image"]/@content').get() or "",
            "download_url": video_url,
            "source": "pinterest"
        }
    except Exception as e:
        return {"error": f"Pinterest scraper failed: {str(e)}"}

@app.route('/scrape', methods=['POST'])
def scrape():
    data = request.get_json()
    if not data or 'url' not in data:
        return jsonify({"error": "Missing 'url' in JSON body"}), 400
        
    url = data['url']
    req_type = str(data.get('type', 'instagram')).lower().strip()
    
    print(f"Scrape Request: {req_type} -> {url}")
    
    if req_type in ['instagram', 'insta', 'ig']:
        result = scrape_instagram(url)
    elif req_type in ['facebook', 'fb']:
        result = scrape_facebook(url)
    elif req_type in ['youtube', 'yt']:
        result = scrape_youtube(url)
    elif req_type in ['pinterest', 'pin']:
        result = scrape_pinterest(url)
    else:
        return jsonify({"error": f"Unsupported platform: {req_type}"}), 400
        
    if "error" in result:
        return jsonify(result), 500
        
    return jsonify(result)

@app.route('/scrape-private', methods=['POST'])
def scrape_private():
    """
    Endpoint for private content where user provides HTML source
    """
    data = request.get_json()
    source = data.get('source', '')
    if not source:
        return jsonify({"error": "No source provided"}), 400
        
    # Using Scrapy selector to find links in raw source
    sel = Selector(text=source)
    
    # Try finding og:video first
    video_url = sel.xpath('//meta[@property="og:video"]/@content').get()
    
    if not video_url:
        # Try raw regex in the text
        match = re.search(r'"video_url":"([^"]+)"', source)
        if match:
            video_url = match.group(1).replace("\\u0026", "&")
            
    if not video_url:
        # Search for any mp4 link
        mp4_links = re.findall(r'https://[^"]+\.mp4[^"]+', source)
        if mp4_links:
            video_url = max(mp4_links, key=len).replace("\\u0026", "&").replace("\\/", "/")
            
    if video_url:
        thumbnail = sel.xpath('//meta[@property="og:image"]/@content').get() or ""
        return jsonify({
            "title": "Private Instagram Video",
            "thumbnail": thumbnail,
            "download_url": video_url,
            "source": "instagram_private"
        })
    
    return jsonify({"error": "Could not extract video link from page source"}), 500

# Namecheap Passenger WSGI Entry point
application = app

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000)
