add new url scraping data and create tab data lists

This commit is contained in:
arifal
2025-08-15 17:25:20 +07:00
parent 6896fd62a3
commit 209ef07f9c
18 changed files with 1220 additions and 424 deletions

View File

@@ -5,6 +5,7 @@ namespace App\Services;
use App\Models\GlobalSetting;
use App\Models\PbgTask;
use App\Models\PbgTaskDetail;
use App\Models\PbgTaskDetailDataList;
use App\Models\PbgTaskIndexIntegrations;
use App\Models\PbgTaskPrasarana;
use App\Models\PbgTaskRetributions;
@@ -36,44 +37,119 @@ class ServiceTabPbgTask
$this->user_refresh_token = $auth_data['refresh'];
}
public function run_service($retry_uuid = null)
public function run_service($retry_uuid = null, $chunk_size = 50)
{
try {
$pbg_tasks = PbgTask::orderBy('id')->get();
$start = false;
$query = PbgTask::orderBy('id');
// If retry_uuid is provided, start from that UUID
if ($retry_uuid) {
$retryTask = PbgTask::where('uuid', $retry_uuid)->first();
if ($retryTask) {
$query->where('id', '>=', $retryTask->id);
Log::info("Resuming sync from UUID: {$retry_uuid} (ID: {$retryTask->id})");
}
}
foreach ($pbg_tasks as $pbg_task) {
if($retry_uuid){
if($pbg_task->uuid === $retry_uuid){
$start = true;
}
$totalTasks = $query->count();
$processedCount = 0;
Log::info("Starting sync for {$totalTasks} PBG Tasks with chunk size: {$chunk_size}");
if(!$start){
// Process in chunks to reduce memory usage
$query->chunk($chunk_size, function ($pbg_tasks) use (&$processedCount, $totalTasks) {
$chunkStartTime = now();
foreach ($pbg_tasks as $pbg_task) {
try {
$this->current_uuid = $pbg_task->uuid;
$taskStartTime = now();
// Process all endpoints for this task
$this->processTaskEndpoints($pbg_task->uuid);
$processedCount++;
$taskTime = now()->diffInSeconds($taskStartTime);
// Log progress every 10 tasks
if ($processedCount % 10 === 0) {
$progress = round(($processedCount / $totalTasks) * 100, 2);
Log::info("Progress: {$processedCount}/{$totalTasks} ({$progress}%) - Last task took {$taskTime}s");
}
} catch (\Exception $e) {
Log::error("Failed on UUID: {$this->current_uuid}, Error: " . $e->getMessage());
// Check if this is a critical error that should stop the process
if ($this->isCriticalError($e)) {
throw $e;
}
// For non-critical errors, log and continue
Log::warning("Skipping UUID {$this->current_uuid} due to non-critical error");
continue;
}
}
try{
$this->current_uuid = $pbg_task->uuid;
$this->scraping_task_details($pbg_task->uuid);
// $this->scraping_task_assignments($pbg_task->uuid);
$this->scraping_task_retributions($pbg_task->uuid);
$this->scraping_task_integrations($pbg_task->uuid);
}catch(\Exception $e){
Log::error("Failed on UUID: {$this->current_uuid}, Error: " . $e->getMessage());
throw $e;
$chunkTime = now()->diffInSeconds($chunkStartTime);
Log::info("Processed chunk of {$pbg_tasks->count()} tasks in {$chunkTime} seconds");
// Small delay between chunks to prevent API rate limiting
if ($pbg_tasks->count() === $chunk_size) {
sleep(1);
}
}
});
Log::info("Successfully completed sync for {$processedCount} PBG Tasks");
} catch (\Exception $e) {
Log::error("Failed to syncronize: " . $e->getMessage());
Log::error("Failed to synchronize: " . $e->getMessage());
throw $e;
}
}
/**
* Process all endpoints for a single task
*/
private function processTaskEndpoints(string $uuid): void
{
$this->scraping_task_details($uuid);
$this->scraping_pbg_data_list($uuid);
// $this->scraping_task_assignments($uuid);
$this->scraping_task_retributions($uuid);
$this->scraping_task_integrations($uuid);
}
/**
* Determine if an error is critical and should stop the process
*/
private function isCriticalError(\Exception $e): bool
{
$message = $e->getMessage();
// Critical authentication errors
if (strpos($message, 'Token refresh and login failed') !== false) {
return true;
}
// Critical system errors
if (strpos($message, 'Connection refused') !== false) {
return true;
}
// Database connection errors
if (strpos($message, 'database') !== false && strpos($message, 'connection') !== false) {
return true;
}
return false;
}
public function getFailedUUID(){
return $this->current_uuid;
}
private function scraping_task_details($uuid)
public function scraping_task_details($uuid)
{
$url = "{$this->simbg_host}/api/pbg/v1/detail/{$uuid}/";
$options = [
@@ -144,7 +220,7 @@ class ServiceTabPbgTask
throw new \Exception("Failed to fetch task details for UUID {$uuid} after retries.");
}
private function scraping_task_assignments($uuid)
public function scraping_task_assignments($uuid)
{
$url = "{$this->simbg_host}/api/pbg/v1/list-tim-penilai/{$uuid}/?page=1&size=10";
$options = [
@@ -237,7 +313,173 @@ class ServiceTabPbgTask
throw new \Exception("Failed to fetch task assignments for UUID {$uuid} after retries.");
}
private function scraping_task_retributions($uuid)
public function scraping_pbg_data_list($uuid){
$url = "{$this->simbg_host}/api/pbg/v1/detail/{$uuid}/list-data/?sort=DESC";
$options = [
'headers' => [
'Authorization' => "Bearer {$this->user_token}",
'Content-Type' => 'application/json'
]
];
$maxRetries = 3;
$initialDelay = 1;
$retriedAfter401 = false;
for ($retryCount = 0; $retryCount < $maxRetries; $retryCount++) {
try{
$response = $this->client->get($url, $options);
$responseData = json_decode($response->getBody()->getContents(), true, 512, JSON_THROW_ON_ERROR);
if (empty($responseData['data']) || !is_array($responseData['data'])) {
Log::info("No data list found for UUID: {$uuid}");
return true;
}
$data = $responseData['data'];
Log::info("Processing data list for UUID: {$uuid}, found " . count($data) . " items");
// Process each data list item and save to database
$this->processDataListItems($data, $uuid);
return $responseData;
} catch (\GuzzleHttp\Exception\ClientException $e) {
if ($e->getCode() === 401 && !$retriedAfter401) {
Log::warning("401 Unauthorized - Refreshing token and retrying...");
try{
$this->refreshToken();
$options['headers']['Authorization'] = "Bearer {$this->user_token}";
$retriedAfter401 = true;
continue;
}catch(\Exception $refreshError){
Log::error("Token refresh and login failed: " . $refreshError->getMessage());
return false;
}
}
return false;
} catch (\GuzzleHttp\Exception\ServerException | \GuzzleHttp\Exception\ConnectException $e) {
if ($e->getCode() === 502) {
Log::warning("502 Bad Gateway - Retrying in {$initialDelay} seconds...");
} else {
Log::error("Network error ({$e->getCode()}) - Retrying in {$initialDelay} seconds...");
}
sleep($initialDelay);
$initialDelay *= 2;
} catch (\GuzzleHttp\Exception\RequestException $e) {
Log::error("Request error ({$e->getCode()}): " . $e->getMessage());
return false;
} catch (\JsonException $e) {
Log::error("JSON decoding error: " . $e->getMessage());
return false;
} catch (\Throwable $e) {
Log::critical("Unhandled error: " . $e->getMessage(), ['trace' => $e->getTraceAsString()]);
return false;
}
}
Log::error("Failed to fetch task data list for UUID {$uuid} after {$maxRetries} retries.");
throw new \Exception("Failed to fetch task data list for UUID {$uuid} after retries.");
}
/**
* Process and save data list items to database (Optimized with bulk operations)
*/
private function processDataListItems(array $dataListItems, string $pbgTaskUuid): void
{
try {
if (empty($dataListItems)) {
return;
}
$batchData = [];
$validItems = 0;
foreach ($dataListItems as $item) {
// Validate required fields
if (empty($item['uid'])) {
Log::warning("Skipping data list item with missing UID for PBG Task: {$pbgTaskUuid}");
continue;
}
// Parse created_at if exists
$createdAt = null;
if (!empty($item['created_at'])) {
try {
$createdAt = Carbon::parse($item['created_at'])->format('Y-m-d H:i:s');
} catch (\Exception $e) {
Log::warning("Invalid created_at format for data list UID: {$item['uid']}, Error: " . $e->getMessage());
}
}
$batchData[] = [
'uid' => $item['uid'],
'name' => $item['name'] ?? null,
'description' => $item['description'] ?? null,
'status' => $item['status'] ?? null,
'status_name' => $item['status_name'] ?? null,
'data_type' => $item['data_type'] ?? null,
'data_type_name' => $item['data_type_name'] ?? null,
'file' => $item['file'] ?? null,
'note' => $item['note'] ?? null,
'pbg_task_uuid' => $pbgTaskUuid,
'created_at' => $createdAt ?: now(),
'updated_at' => now(),
];
$validItems++;
}
if (!empty($batchData)) {
// Use upsert for bulk insert/update operations
PbgTaskDetailDataList::upsert(
$batchData,
['uid'], // Unique columns
[
'name', 'description', 'status', 'status_name',
'data_type', 'data_type_name', 'file', 'note',
'pbg_task_uuid', 'updated_at'
] // Columns to update
);
Log::info("Successfully bulk processed {$validItems} data list items for PBG Task: {$pbgTaskUuid}");
}
} catch (\Exception $e) {
Log::error("Error bulk processing data list items for PBG Task {$pbgTaskUuid}: " . $e->getMessage());
throw $e;
}
}
/**
* Alternative method using PbgTask model's syncDataLists for cleaner code
*/
private function processDataListItemsWithModel(array $dataListItems, string $pbgTaskUuid): void
{
try {
// Find the PbgTask
$pbgTask = PbgTask::where('uuid', $pbgTaskUuid)->first();
if (!$pbgTask) {
Log::error("PBG Task not found with UUID: {$pbgTaskUuid}");
return;
}
// Use the model's syncDataLists method
$pbgTask->syncDataLists($dataListItems);
$processedCount = count($dataListItems);
Log::info("Successfully synced {$processedCount} data list items for PBG Task: {$pbgTaskUuid} using model method");
} catch (\Exception $e) {
Log::error("Error syncing data list items for PBG Task {$pbgTaskUuid}: " . $e->getMessage());
throw $e;
}
}
public function scraping_task_retributions($uuid)
{
$url = "{$this->simbg_host}/api/pbg/v1/detail/" . $uuid . "/retribution/submit/";
$options = [
@@ -354,7 +596,7 @@ class ServiceTabPbgTask
throw new \Exception("Failed to fetch task retributions for UUID {$uuid} after retries.");
}
private function scraping_task_integrations($uuid){
public function scraping_task_integrations($uuid){
$url = "{$this->simbg_host}/api/pbg/v1/detail/" . $uuid . "/retribution/indeks-terintegrasi/";
$options = [
'headers' => [