add new url scraping data and create tab data lists
This commit is contained in:
@@ -5,6 +5,7 @@ namespace App\Services;
|
||||
use App\Models\GlobalSetting;
|
||||
use App\Models\PbgTask;
|
||||
use App\Models\PbgTaskDetail;
|
||||
use App\Models\PbgTaskDetailDataList;
|
||||
use App\Models\PbgTaskIndexIntegrations;
|
||||
use App\Models\PbgTaskPrasarana;
|
||||
use App\Models\PbgTaskRetributions;
|
||||
@@ -36,44 +37,119 @@ class ServiceTabPbgTask
|
||||
$this->user_refresh_token = $auth_data['refresh'];
|
||||
}
|
||||
|
||||
public function run_service($retry_uuid = null)
|
||||
public function run_service($retry_uuid = null, $chunk_size = 50)
|
||||
{
|
||||
try {
|
||||
$pbg_tasks = PbgTask::orderBy('id')->get();
|
||||
$start = false;
|
||||
$query = PbgTask::orderBy('id');
|
||||
|
||||
// If retry_uuid is provided, start from that UUID
|
||||
if ($retry_uuid) {
|
||||
$retryTask = PbgTask::where('uuid', $retry_uuid)->first();
|
||||
if ($retryTask) {
|
||||
$query->where('id', '>=', $retryTask->id);
|
||||
Log::info("Resuming sync from UUID: {$retry_uuid} (ID: {$retryTask->id})");
|
||||
}
|
||||
}
|
||||
|
||||
foreach ($pbg_tasks as $pbg_task) {
|
||||
if($retry_uuid){
|
||||
if($pbg_task->uuid === $retry_uuid){
|
||||
$start = true;
|
||||
}
|
||||
$totalTasks = $query->count();
|
||||
$processedCount = 0;
|
||||
|
||||
Log::info("Starting sync for {$totalTasks} PBG Tasks with chunk size: {$chunk_size}");
|
||||
|
||||
if(!$start){
|
||||
// Process in chunks to reduce memory usage
|
||||
$query->chunk($chunk_size, function ($pbg_tasks) use (&$processedCount, $totalTasks) {
|
||||
$chunkStartTime = now();
|
||||
|
||||
foreach ($pbg_tasks as $pbg_task) {
|
||||
try {
|
||||
$this->current_uuid = $pbg_task->uuid;
|
||||
$taskStartTime = now();
|
||||
|
||||
// Process all endpoints for this task
|
||||
$this->processTaskEndpoints($pbg_task->uuid);
|
||||
|
||||
$processedCount++;
|
||||
$taskTime = now()->diffInSeconds($taskStartTime);
|
||||
|
||||
// Log progress every 10 tasks
|
||||
if ($processedCount % 10 === 0) {
|
||||
$progress = round(($processedCount / $totalTasks) * 100, 2);
|
||||
Log::info("Progress: {$processedCount}/{$totalTasks} ({$progress}%) - Last task took {$taskTime}s");
|
||||
}
|
||||
|
||||
} catch (\Exception $e) {
|
||||
Log::error("Failed on UUID: {$this->current_uuid}, Error: " . $e->getMessage());
|
||||
|
||||
// Check if this is a critical error that should stop the process
|
||||
if ($this->isCriticalError($e)) {
|
||||
throw $e;
|
||||
}
|
||||
|
||||
// For non-critical errors, log and continue
|
||||
Log::warning("Skipping UUID {$this->current_uuid} due to non-critical error");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
try{
|
||||
$this->current_uuid = $pbg_task->uuid;
|
||||
$this->scraping_task_details($pbg_task->uuid);
|
||||
// $this->scraping_task_assignments($pbg_task->uuid);
|
||||
$this->scraping_task_retributions($pbg_task->uuid);
|
||||
$this->scraping_task_integrations($pbg_task->uuid);
|
||||
}catch(\Exception $e){
|
||||
Log::error("Failed on UUID: {$this->current_uuid}, Error: " . $e->getMessage());
|
||||
throw $e;
|
||||
|
||||
$chunkTime = now()->diffInSeconds($chunkStartTime);
|
||||
Log::info("Processed chunk of {$pbg_tasks->count()} tasks in {$chunkTime} seconds");
|
||||
|
||||
// Small delay between chunks to prevent API rate limiting
|
||||
if ($pbg_tasks->count() === $chunk_size) {
|
||||
sleep(1);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Log::info("Successfully completed sync for {$processedCount} PBG Tasks");
|
||||
|
||||
} catch (\Exception $e) {
|
||||
Log::error("Failed to syncronize: " . $e->getMessage());
|
||||
Log::error("Failed to synchronize: " . $e->getMessage());
|
||||
throw $e;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Process all endpoints for a single task
|
||||
*/
|
||||
private function processTaskEndpoints(string $uuid): void
|
||||
{
|
||||
$this->scraping_task_details($uuid);
|
||||
$this->scraping_pbg_data_list($uuid);
|
||||
// $this->scraping_task_assignments($uuid);
|
||||
$this->scraping_task_retributions($uuid);
|
||||
$this->scraping_task_integrations($uuid);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine if an error is critical and should stop the process
|
||||
*/
|
||||
private function isCriticalError(\Exception $e): bool
|
||||
{
|
||||
$message = $e->getMessage();
|
||||
|
||||
// Critical authentication errors
|
||||
if (strpos($message, 'Token refresh and login failed') !== false) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Critical system errors
|
||||
if (strpos($message, 'Connection refused') !== false) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Database connection errors
|
||||
if (strpos($message, 'database') !== false && strpos($message, 'connection') !== false) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public function getFailedUUID(){
|
||||
return $this->current_uuid;
|
||||
}
|
||||
|
||||
private function scraping_task_details($uuid)
|
||||
public function scraping_task_details($uuid)
|
||||
{
|
||||
$url = "{$this->simbg_host}/api/pbg/v1/detail/{$uuid}/";
|
||||
$options = [
|
||||
@@ -144,7 +220,7 @@ class ServiceTabPbgTask
|
||||
throw new \Exception("Failed to fetch task details for UUID {$uuid} after retries.");
|
||||
}
|
||||
|
||||
private function scraping_task_assignments($uuid)
|
||||
public function scraping_task_assignments($uuid)
|
||||
{
|
||||
$url = "{$this->simbg_host}/api/pbg/v1/list-tim-penilai/{$uuid}/?page=1&size=10";
|
||||
$options = [
|
||||
@@ -237,7 +313,173 @@ class ServiceTabPbgTask
|
||||
throw new \Exception("Failed to fetch task assignments for UUID {$uuid} after retries.");
|
||||
}
|
||||
|
||||
private function scraping_task_retributions($uuid)
|
||||
public function scraping_pbg_data_list($uuid){
|
||||
$url = "{$this->simbg_host}/api/pbg/v1/detail/{$uuid}/list-data/?sort=DESC";
|
||||
$options = [
|
||||
'headers' => [
|
||||
'Authorization' => "Bearer {$this->user_token}",
|
||||
'Content-Type' => 'application/json'
|
||||
]
|
||||
];
|
||||
|
||||
$maxRetries = 3;
|
||||
$initialDelay = 1;
|
||||
$retriedAfter401 = false;
|
||||
|
||||
for ($retryCount = 0; $retryCount < $maxRetries; $retryCount++) {
|
||||
try{
|
||||
$response = $this->client->get($url, $options);
|
||||
$responseData = json_decode($response->getBody()->getContents(), true, 512, JSON_THROW_ON_ERROR);
|
||||
|
||||
if (empty($responseData['data']) || !is_array($responseData['data'])) {
|
||||
Log::info("No data list found for UUID: {$uuid}");
|
||||
return true;
|
||||
}
|
||||
|
||||
$data = $responseData['data'];
|
||||
|
||||
Log::info("Processing data list for UUID: {$uuid}, found " . count($data) . " items");
|
||||
|
||||
// Process each data list item and save to database
|
||||
$this->processDataListItems($data, $uuid);
|
||||
|
||||
return $responseData;
|
||||
} catch (\GuzzleHttp\Exception\ClientException $e) {
|
||||
if ($e->getCode() === 401 && !$retriedAfter401) {
|
||||
Log::warning("401 Unauthorized - Refreshing token and retrying...");
|
||||
try{
|
||||
$this->refreshToken();
|
||||
$options['headers']['Authorization'] = "Bearer {$this->user_token}";
|
||||
$retriedAfter401 = true;
|
||||
continue;
|
||||
}catch(\Exception $refreshError){
|
||||
Log::error("Token refresh and login failed: " . $refreshError->getMessage());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
} catch (\GuzzleHttp\Exception\ServerException | \GuzzleHttp\Exception\ConnectException $e) {
|
||||
if ($e->getCode() === 502) {
|
||||
Log::warning("502 Bad Gateway - Retrying in {$initialDelay} seconds...");
|
||||
} else {
|
||||
Log::error("Network error ({$e->getCode()}) - Retrying in {$initialDelay} seconds...");
|
||||
}
|
||||
|
||||
sleep($initialDelay);
|
||||
$initialDelay *= 2;
|
||||
} catch (\GuzzleHttp\Exception\RequestException $e) {
|
||||
Log::error("Request error ({$e->getCode()}): " . $e->getMessage());
|
||||
return false;
|
||||
} catch (\JsonException $e) {
|
||||
Log::error("JSON decoding error: " . $e->getMessage());
|
||||
return false;
|
||||
} catch (\Throwable $e) {
|
||||
Log::critical("Unhandled error: " . $e->getMessage(), ['trace' => $e->getTraceAsString()]);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
Log::error("Failed to fetch task data list for UUID {$uuid} after {$maxRetries} retries.");
|
||||
throw new \Exception("Failed to fetch task data list for UUID {$uuid} after retries.");
|
||||
}
|
||||
|
||||
/**
|
||||
* Process and save data list items to database (Optimized with bulk operations)
|
||||
*/
|
||||
private function processDataListItems(array $dataListItems, string $pbgTaskUuid): void
|
||||
{
|
||||
try {
|
||||
if (empty($dataListItems)) {
|
||||
return;
|
||||
}
|
||||
|
||||
$batchData = [];
|
||||
$validItems = 0;
|
||||
|
||||
foreach ($dataListItems as $item) {
|
||||
// Validate required fields
|
||||
if (empty($item['uid'])) {
|
||||
Log::warning("Skipping data list item with missing UID for PBG Task: {$pbgTaskUuid}");
|
||||
continue;
|
||||
}
|
||||
|
||||
// Parse created_at if exists
|
||||
$createdAt = null;
|
||||
if (!empty($item['created_at'])) {
|
||||
try {
|
||||
$createdAt = Carbon::parse($item['created_at'])->format('Y-m-d H:i:s');
|
||||
} catch (\Exception $e) {
|
||||
Log::warning("Invalid created_at format for data list UID: {$item['uid']}, Error: " . $e->getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
$batchData[] = [
|
||||
'uid' => $item['uid'],
|
||||
'name' => $item['name'] ?? null,
|
||||
'description' => $item['description'] ?? null,
|
||||
'status' => $item['status'] ?? null,
|
||||
'status_name' => $item['status_name'] ?? null,
|
||||
'data_type' => $item['data_type'] ?? null,
|
||||
'data_type_name' => $item['data_type_name'] ?? null,
|
||||
'file' => $item['file'] ?? null,
|
||||
'note' => $item['note'] ?? null,
|
||||
'pbg_task_uuid' => $pbgTaskUuid,
|
||||
'created_at' => $createdAt ?: now(),
|
||||
'updated_at' => now(),
|
||||
];
|
||||
|
||||
$validItems++;
|
||||
}
|
||||
|
||||
if (!empty($batchData)) {
|
||||
// Use upsert for bulk insert/update operations
|
||||
PbgTaskDetailDataList::upsert(
|
||||
$batchData,
|
||||
['uid'], // Unique columns
|
||||
[
|
||||
'name', 'description', 'status', 'status_name',
|
||||
'data_type', 'data_type_name', 'file', 'note',
|
||||
'pbg_task_uuid', 'updated_at'
|
||||
] // Columns to update
|
||||
);
|
||||
|
||||
Log::info("Successfully bulk processed {$validItems} data list items for PBG Task: {$pbgTaskUuid}");
|
||||
}
|
||||
|
||||
} catch (\Exception $e) {
|
||||
Log::error("Error bulk processing data list items for PBG Task {$pbgTaskUuid}: " . $e->getMessage());
|
||||
throw $e;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Alternative method using PbgTask model's syncDataLists for cleaner code
|
||||
*/
|
||||
private function processDataListItemsWithModel(array $dataListItems, string $pbgTaskUuid): void
|
||||
{
|
||||
try {
|
||||
// Find the PbgTask
|
||||
$pbgTask = PbgTask::where('uuid', $pbgTaskUuid)->first();
|
||||
|
||||
if (!$pbgTask) {
|
||||
Log::error("PBG Task not found with UUID: {$pbgTaskUuid}");
|
||||
return;
|
||||
}
|
||||
|
||||
// Use the model's syncDataLists method
|
||||
$pbgTask->syncDataLists($dataListItems);
|
||||
|
||||
$processedCount = count($dataListItems);
|
||||
Log::info("Successfully synced {$processedCount} data list items for PBG Task: {$pbgTaskUuid} using model method");
|
||||
|
||||
} catch (\Exception $e) {
|
||||
Log::error("Error syncing data list items for PBG Task {$pbgTaskUuid}: " . $e->getMessage());
|
||||
throw $e;
|
||||
}
|
||||
}
|
||||
|
||||
public function scraping_task_retributions($uuid)
|
||||
{
|
||||
$url = "{$this->simbg_host}/api/pbg/v1/detail/" . $uuid . "/retribution/submit/";
|
||||
$options = [
|
||||
@@ -354,7 +596,7 @@ class ServiceTabPbgTask
|
||||
throw new \Exception("Failed to fetch task retributions for UUID {$uuid} after retries.");
|
||||
}
|
||||
|
||||
private function scraping_task_integrations($uuid){
|
||||
public function scraping_task_integrations($uuid){
|
||||
$url = "{$this->simbg_host}/api/pbg/v1/detail/" . $uuid . "/retribution/indeks-terintegrasi/";
|
||||
$options = [
|
||||
'headers' => [
|
||||
|
||||
Reference in New Issue
Block a user