queue = 'scraping'; } /** * Execute the job with optimized schema: * 1. Scrape Google Sheet first * 2. Scrape PBG Task to get parent data * 3. Loop through parent tasks to scrape details via ServiceTabPbgTask */ public function handle() { $import_datasource = null; $failed_uuid = null; $processedTasks = 0; $totalTasks = 0; try { Log::info("=== SCRAPING DATA JOB STARTED ==="); // Initialize services $service_google_sheet = app(ServiceGoogleSheet::class); $service_pbg_task = app(ServicePbgTask::class); $service_tab_pbg_task = app(ServiceTabPbgTask::class); // Create ImportDatasource record $import_datasource = ImportDatasource::create([ 'message' => 'Starting optimized scraping process...', 'response_body' => null, 'status' => 'processing', 'start_time' => now(), 'failed_uuid' => null ]); Log::info("ImportDatasource created", ['id' => $import_datasource->id]); // STEP 1: Scrape Google Sheet data first Log::info("=== STEP 1: SCRAPING GOOGLE SHEET ==="); $import_datasource->update(['message' => 'Scraping Google Sheet data...']); $service_google_sheet->run_service(); Log::info("Google Sheet scraping completed successfully"); // STEP 2: Scrape PBG Task to get parent data Log::info("=== STEP 2: SCRAPING PBG TASK PARENT DATA ==="); $import_datasource->update(['message' => 'Scraping PBG Task parent data...']); $service_pbg_task->run_service(); Log::info("PBG Task parent data scraping completed"); // STEP 3: Get all PBG tasks for detail scraping $totalTasks = PbgTask::count(); Log::info("=== STEP 3: SCRAPING PBG TASK DETAILS ===", [ 'total_tasks' => $totalTasks ]); $import_datasource->update([ 'message' => "Scraping details for {$totalTasks} PBG tasks..." ]); // Process tasks in chunks for memory efficiency $chunkSize = 100; $processedTasks = 0; PbgTask::orderBy('id')->chunk($chunkSize, function ($pbg_tasks) use ( $service_tab_pbg_task, &$processedTasks, $totalTasks, $import_datasource, &$failed_uuid ) { foreach ($pbg_tasks as $pbg_task) { try { // Scrape all details for this task $this->processTaskDetails($service_tab_pbg_task, $pbg_task->uuid); $processedTasks++; // Update progress every 10 tasks if ($processedTasks % 10 === 0) { $progress = round(($processedTasks / $totalTasks) * 100, 2); Log::info("Progress update", [ 'processed' => $processedTasks, 'total' => $totalTasks, 'progress' => "{$progress}%" ]); $import_datasource->update([ 'message' => "Processing details: {$processedTasks}/{$totalTasks} ({$progress}%)" ]); } } catch (\Exception $e) { Log::warning("Failed to process task details", [ 'uuid' => $pbg_task->uuid, 'error' => $e->getMessage() ]); // Store failed UUID but continue processing $failed_uuid = $pbg_task->uuid; // Only stop if it's a critical error if ($this->isCriticalError($e)) { throw $e; } } } }); Log::info("Task details scraping completed", [ 'processed_tasks' => $processedTasks, 'total_tasks' => $totalTasks ]); // STEP 4: Generate BigData Resume Log::info("=== STEP 4: GENERATING BIGDATA RESUME ==="); $import_datasource->update(['message' => 'Generating BigData resume...']); $data_setting_result = $service_google_sheet->get_big_resume_data(); BigdataResume::generateResumeData($import_datasource->id, "simbg", $data_setting_result); Log::info("BigData resume generated successfully"); // Update final status $import_datasource->update([ 'status' => 'success', 'message' => "Scraping completed successfully. Processed {$processedTasks}/{$totalTasks} tasks.", 'finish_time' => now(), 'failed_uuid' => $failed_uuid // Store last failed UUID if any ]); Log::info("=== SCRAPING DATA JOB COMPLETED SUCCESSFULLY ===", [ 'import_datasource_id' => $import_datasource->id, 'processed_tasks' => $processedTasks, 'total_tasks' => $totalTasks, 'has_failures' => !is_null($failed_uuid) ]); } catch (\Exception $e) { Log::error('=== SCRAPING DATA JOB FAILED ===', [ 'error' => $e->getMessage(), 'file' => $e->getFile(), 'line' => $e->getLine(), 'processed_tasks' => $processedTasks, 'total_tasks' => $totalTasks, 'failed_uuid' => $failed_uuid, 'trace' => $e->getTraceAsString() ]); // Update ImportDatasource with failure info if ($import_datasource) { $import_datasource->update([ 'status' => 'failed', 'message' => "Scraping failed: {$e->getMessage()}. Processed {$processedTasks}/{$totalTasks} tasks.", 'response_body' => 'Scraping process interrupted due to error', 'finish_time' => now(), 'failed_uuid' => $failed_uuid, ]); } // Don't retry this job $this->fail($e); } } /** * Process all detail endpoints for a single PBG task */ private function processTaskDetails(ServiceTabPbgTask $service, string $uuid): void { // Call all detail scraping methods for this task $service->scraping_task_details($uuid); $service->scraping_pbg_data_list($uuid); $service->scraping_task_retributions($uuid); $service->scraping_task_integrations($uuid); } /** * Determine if an error is critical enough to stop the entire process */ private function isCriticalError(\Exception $e): bool { $criticalMessages = [ 'authentication failed', 'token expired', 'database connection', 'memory exhausted', 'maximum execution time' ]; $errorMessage = strtolower($e->getMessage()); foreach ($criticalMessages as $critical) { if (strpos($errorMessage, $critical) !== false) { return true; } } return false; } }