<?php

/**
 * Get artist information from Tunefind using cURL
 * @route GET /tunefind/artist/{artist_name}
 */
function getArtistInfo($artist_name) {
    try {
        error_log("Fetching Tunefind data for artist: {$artist_name}");

        // ScraperAPI configuration
        $scraperApiKey = '3bd075168a9f82f693132c2db5eba04e';
        $targetUrl = "https://www.tunefind.com/artist/{$artist_name}";
        
        // Build ScraperAPI URL with render=true and wait_for to allow JavaScript to execute
        // wait_for: Wait 10 seconds for JavaScript to fully render (similar to Node.js version)
        $url = "http://api.scraperapi.com/?api_key={$scraperApiKey}&url=" . urlencode($targetUrl) . "&render=true&wait_for=10000";
        
        error_log("Using ScraperAPI to fetch: {$targetUrl} (waiting 10 seconds for JS rendering)");

        // Initialize cURL
        $ch = curl_init();
        
        // Set cURL options (simplified since ScraperAPI handles browser emulation)
        curl_setopt_array($ch, [
            CURLOPT_URL => $url,
            CURLOPT_RETURNTRANSFER => true,
            CURLOPT_FOLLOWLOCATION => true,
            CURLOPT_TIMEOUT => 120, // Increased timeout for render=true (JavaScript execution takes time)
            CURLOPT_SSL_VERIFYPEER => true,
            CURLOPT_SSL_VERIFYHOST => 2,
            CURLOPT_ENCODING => ''
        ]);

        // Execute cURL request
        $html = curl_exec($ch);
        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
        
        if (curl_errno($ch)) {
            $error = curl_error($ch);
            curl_close($ch);
            throw new Exception("cURL Error: {$error}");
        }
        
        curl_close($ch);

        if ($httpCode !== 200) {
            throw new Exception("HTTP Error: {$httpCode}");
        }

        if (empty($html)) {
            throw new Exception("Empty response received");
        }

        error_log("Successfully fetched HTML (" . strlen($html) . " bytes)");

        // Parse the HTML to get debug info and basic data
        $artistData = parseArtistData($html);
        $debugInfo = getDebugInfo($html);

        // Try to extract __remixContext from the rendered HTML
        $remixContext = extractRemixContextFromHTML($html);

        // Extract only the apiData from the nested structure if __remixContext exists
        $apiData = null;
        if ($remixContext) {
            error_log("✅ Found __remixContext, extracting apiData...");
            
            // Path: state.loaderData['routes/artist.$artistName'].apiData
            if (isset($remixContext['state']['loaderData']['routes/artist.$artistName']['apiData'])) {
                $apiData = $remixContext['state']['loaderData']['routes/artist.$artistName']['apiData'];
                error_log("✅ Successfully extracted apiData from __remixContext");
            } else {
                error_log("⚠️ __remixContext found but apiData path not found");
            }
        } else {
            error_log("⚠️ No __remixContext found in rendered HTML");
        }

        // If we have apiData from __remixContext, return that (preferred)
        if ($apiData) {
            error_log("Successfully extracted Tunefind data from __remixContext for artist: {$artist_name}");
            
            return [
                'success' => true,
                'artist' => $artist_name,
                'data' => $apiData,
                'source' => '__remixContext'
            ];
        }

        // Otherwise, fallback to HTML parsing results
        if (empty($artistData['title']) && empty($artistData['full_html'])) {
            error_log("No artist data found for: {$artist_name}");
            
            return [
                'success' => false,
                'message' => 'Artist data not found',
                'data' => null,
                'debug' => $debugInfo
            ];
        }

        error_log("Returning HTML parsed data for artist: {$artist_name}");

        // Return the HTML parsed data with debug info
        return [
            'success' => true,
            'artist' => $artist_name,
            'data' => $artistData,
            'source' => 'html_parsing',
            'debug' => $debugInfo
        ];

    } catch (Exception $e) {
        error_log("Error fetching Tunefind data for {$artist_name}: " . $e->getMessage());
        
        return [
            'success' => false,
            'message' => 'Error fetching artist data',
            'error' => $e->getMessage()
        ];
    }
}

/**
 * Parse artist data from HTML
 */
function parseArtistData($html) {
    $data = [
        'artist_name' => null,
        'title' => null,
        'stats' => null,
        'songs_count' => null,
        'appearances_count' => null,
        'full_html' => null
    ];

    // Create DOMDocument
    libxml_use_internal_errors(true); // Suppress HTML parsing warnings
    $dom = new DOMDocument();
    $dom->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
    libxml_clear_errors();

    $xpath = new DOMXPath($dom);

    // Try to find the specific div with class "absolute bottom-6"
    $targetDivs = $xpath->query("//div[contains(@class, 'absolute') and contains(@class, 'bottom-6')]");
    
    if ($targetDivs->length > 0) {
        $targetDiv = $targetDivs->item(0);
        $data['full_html'] = $dom->saveHTML($targetDiv);

        // Extract h1 title
        $h1Elements = $xpath->query(".//h1", $targetDiv);
        if ($h1Elements->length > 0) {
            $data['title'] = trim($h1Elements->item(0)->textContent);
        }

        // Extract stats paragraph
        $paragraphs = $xpath->query(".//p", $targetDiv);
        if ($paragraphs->length > 0) {
            $lastP = $paragraphs->item($paragraphs->length - 1);
            $data['stats'] = trim($lastP->textContent);

            // Parse songs and appearances
            if (preg_match('/(\d+)\s*Songs?\s*\|\s*(\d+)\s*Appearances?/i', $data['stats'], $matches)) {
                $data['songs_count'] = (int)$matches[1];
                $data['appearances_count'] = (int)$matches[2];
            }
        }
    }

    // Fallback: Try to find h1 anywhere on page
    if (empty($data['title'])) {
        $h1Elements = $xpath->query("//h1");
        if ($h1Elements->length > 0) {
            $data['title'] = trim($h1Elements->item(0)->textContent);
        }
    }

    return $data;
}

/**
 * Get debug information from HTML
 */
function getDebugInfo($html) {
    libxml_use_internal_errors(true);
    $dom = new DOMDocument();
    $dom->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
    libxml_clear_errors();

    $xpath = new DOMXPath($dom);

    $debug = [
        'title' => $dom->getElementsByTagName('title')->length > 0 
            ? $dom->getElementsByTagName('title')->item(0)->textContent 
            : 'No title',
        'bodyLength' => strlen($dom->saveHTML($dom->getElementsByTagName('body')->item(0))),
        'hasH1' => $dom->getElementsByTagName('h1')->length > 0,
        'h1Text' => null,
        'targetDiv' => null,
        'allH1s' => [],
        'allParagraphs' => [],
        'scriptTags' => [],
        'remixContext' => null
    ];

    // Get h1 text
    $h1Elements = $dom->getElementsByTagName('h1');
    if ($h1Elements->length > 0) {
        $debug['h1Text'] = trim($h1Elements->item(0)->textContent);
    }

    // Get target div
    $targetDivs = $xpath->query("//div[contains(@class, 'absolute') and contains(@class, 'bottom-6')]");
    if ($targetDivs->length > 0) {
        $debug['targetDiv'] = substr($dom->saveHTML($targetDivs->item(0)), 0, 500);
    }

    // Get all h1s
    foreach ($h1Elements as $h1) {
        $debug['allH1s'][] = trim($h1->textContent);
    }

    // Get first 5 paragraphs
    $paragraphs = $dom->getElementsByTagName('p');
    $count = 0;
    foreach ($paragraphs as $p) {
        if ($count >= 5) break;
        $debug['allParagraphs'][] = trim($p->textContent);
        $count++;
    }

    // Inspect all script tags
    $scriptElements = $dom->getElementsByTagName('script');
    foreach ($scriptElements as $index => $script) {
        $content = $script->textContent;
        $hasRemixContext = strpos($content, '__remixContext') !== false;

        $scriptInfo = [
            'index' => $index,
            'id' => $script->getAttribute('id') ?: 'no-id',
            'type' => $script->getAttribute('type') ?: 'no-type',
            'src' => $script->getAttribute('src') ?: 'inline',
            'hasRemix' => stripos($content, 'remix') !== false,
            'hasContext' => stripos($content, 'context') !== false,
            'hasRemixContext' => $hasRemixContext,
            'hasWindow' => strpos($content, 'window.') !== false,
            'contentPreview' => $hasRemixContext ? substr($content, 0, 2000) : substr($content, 0, 200),
            'contentLength' => strlen($content),
            'hasJSON' => strpos($content, '{') !== false && strpos($content, '}') !== false,
            'hasWindowAssignment' => preg_match('/window\.\w+\s*=/', $content)
        ];

        $debug['scriptTags'][] = $scriptInfo;

        // Try to extract __remixContext
        if ($hasRemixContext && is_null($debug['remixContext'])) {
            $debug['remixContext'] = extractRemixContextFromScript($content);
        }
    }

    return $debug;
}

/**
 * Extract __remixContext from script content
 */
function extractRemixContextFromScript($scriptContent) {
    // Try multiple regex patterns
    $patterns = [
        '/window\.__remixContext\s*=\s*({[\s\S]*?});?\s*(?:\/\/|$)/',
        '/window\.__remixContext\s*=\s*({[\s\S]*});\s*$/m',
        '/__remixContext\s*=\s*({[\s\S]*?});/',
        '/=\s*({[\s\S]*})\s*;?\s*$/'
    ];

    foreach ($patterns as $pattern) {
        if (preg_match($pattern, $scriptContent, $matches)) {
            if (isset($matches[1])) {
                $jsonString = $matches[1];
                $decoded = json_decode($jsonString, true);
                
                if (json_last_error() === JSON_ERROR_NONE) {
                    error_log("✅ Successfully parsed __remixContext JSON");
                    return $decoded;
                } else {
                    error_log("❌ JSON parse error: " . json_last_error_msg());
                    return [
                        'error' => 'Failed to parse JSON',
                        'errorMessage' => json_last_error_msg(),
                        'preview' => substr($jsonString, 0, 1000)
                    ];
                }
            }
        }
    }

    // If we found the keyword but couldn't extract
    if (strpos($scriptContent, '__remixContext') !== false) {
        return [
            'error' => 'Found __remixContext keyword but could not extract JSON',
            'contentLength' => strlen($scriptContent),
            'fullContent' => $scriptContent
        ];
    }

    return null;
}

/**
 * Extract __remixContext from rendered HTML
 */
function extractRemixContextFromHTML($html) {
    // Create DOMDocument to parse HTML
    libxml_use_internal_errors(true);
    $dom = new DOMDocument();
    $dom->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
    libxml_clear_errors();

    // Get all script tags
    $scriptElements = $dom->getElementsByTagName('script');
    
    error_log("🔍 Searching through " . $scriptElements->length . " script tags for __remixContext...");
    
    foreach ($scriptElements as $index => $script) {
        $content = $script->textContent;
        
        // Check if this script contains __remixContext
        if (strpos($content, '__remixContext') !== false) {
            error_log("📍 Found script tag #{$index} with __remixContext keyword (" . strlen($content) . " chars)");
            
            // Try multiple regex patterns to extract the JSON
            $patterns = [
                '/window\.__remixContext\s*=\s*({[\s\S]*?});?\s*(?:\/\/|$)/',
                '/window\.__remixContext\s*=\s*({[\s\S]*});\s*$/m',
                '/__remixContext\s*=\s*({[\s\S]*?});?$/m',
                '/=\s*({[\s\S]*})\s*;?\s*$/m'
            ];
            
            foreach ($patterns as $patternIndex => $pattern) {
                if (preg_match($pattern, $content, $matches)) {
                    if (isset($matches[1])) {
                        error_log("✅ Matched with pattern #{$patternIndex}");
                        
                        $jsonString = $matches[1];
                        $decoded = json_decode($jsonString, true);
                        
                        if (json_last_error() === JSON_ERROR_NONE) {
                            $keys = is_array($decoded) ? count($decoded) : 0;
                            error_log("✅ Successfully parsed __remixContext JSON ({$keys} top-level keys)");
                            return $decoded;
                        } else {
                            error_log("❌ JSON parse error: " . json_last_error_msg());
                            error_log("Preview: " . substr($jsonString, 0, 500));
                        }
                    }
                }
            }
            
            // If we found the keyword but couldn't extract
            error_log("⚠️ Found __remixContext keyword but could not extract valid JSON");
            return null;
        }
    }
    
    error_log("❌ No __remixContext found in any script tags");
    return null;
}

// Example usage in a route handler
if (isset($_GET['artist_name'])) {
    header('Content-Type: application/json');
    $artist_name = $_GET['artist_name'];
    $result = getArtistInfo($artist_name);
    echo json_encode($result, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
} else {
    header('Content-Type: application/json');
    echo json_encode([
        'success' => false,
        'message' => 'Please provide artist_name parameter'
    ], JSON_PRETTY_PRINT);
}

?>