#-------------------------------------------------------------------------------
#  ci.smartfix_extractDocs_unsorted_pages.pm:
#
#  Module to create N PDFs for the specified document pages
#
#  Call:
#
#  On Unix:    ./afp2web -q -c -doc_cold -sp:ci.smartfix_extractDocs_unsorted_pages.pm -sa:extractdoc_args.json samples/afpsample.afp
#
#  where
#  extractdoc_args.json must have array of document contexts as given below
#  [
#      { "pages":"1,2,3,4,5,6", "filename":"ci.smartfix_2017-06-23_11_111843-0000-0001" }
#    , { "pages":"7,8,9", "filename":"..." }
#  ]
#
#  Author    : Fa. Maas
#  Copyright : (C) 2017 by Maas Holding GmbH
#
#  $V100   2017-07-12    Initial Release
#
#  $V101   2017-08-11    Bug:
#                        Incorrect page size on extracted PDF documents
#
#                        Reason:
#                        During PDF to TIFF single page transformation, AFP2web overwrites
#                        TIFF resolution as output resolution.
#                        PDF contains TIFF page whose w=1666, h=2345 in 200 dpi
#                        AFP2web extracted TIFF page has w=1666, h=2345 in 300 dpi
#                        Due to this change of resolution, the output page size in extracted PDF
#                        reduced by 200/300.
#
#                        Fix:
#                        Store PDF page sizes in cache and use them when creating pages for document to be extracted
#
#                        JiRa, Customer:
#                        OTS-1747, Hallesche
#
#                        Fixed by:
#                        Panneer
#
#  $V102   2018-01-19    Bug:
#                        Extracting document failed with following error
#                        E088: Scripting Facility Error (rc=-1): Unable to delete file /mnt/transfer/tmp/oxs-smartfix-server/171211153203435-73215/2.tif rc=0
#                        reason=No such file or directory, rc=-88, ScriptArgs=[{"pages":"2,2","filename":"ci.smartfix_2017-12-10_99_000073-0000-0000"}] ...
#
#                        Reason:
#                        "pages" entry in script argument should have unique page id, but in given transformation page 2 is listed twice.
#                        Scripting facility cleans up extracted TIFF pages after adding them on output based on "pages" list. When same
#                        page id is listed more than once, deleting that page second time gives error (as it is deleted already)
#
#                        Fix:
#                        a. It does not make sense to abort a transformation when cleaning up things at the end of it when that transformation was successful.
#                           So, ignore cleanup errors
#                        b. Delete a file only if it exists
#
#                        JiRa, Customer:
#                        OTS-2033, Hallesche
#
#                        Fixed by:
#                        Panneer
#
#-------------------------------------------------------------------------------

#-----------------------------------------------------------------------
# BEGIN block of module
#
# Extends PERL module search path array (@INC) with new element having
# this script modules path in order to have better module portability
#-----------------------------------------------------------------------
BEGIN {
    #---- Fetch script filename
    my $sScriptFilenameTmp = $0;

    #---- Extract script file path from script filename
    my $sScriptFilePathTmp = "";
    if ( $sScriptFilenameTmp =~ /(.*)\/.*\.pm/ ){
        $sScriptFilePathTmp = $1;
    }

    #printf STDERR ( "Script filename: " . $0 . " Script filepath: " . $sScriptFilePathTmp . "\n" );
    if ( $sScriptFilePathTmp eq "" ){
        $sScriptFilePathTmp = ".";
    }
    else {
        my $sScriptFileParentPathTmp = "";
        if ( $sScriptFilePathTmp =~ /(.*)\/sfsamples/ ){
            $sScriptFileParentPathTmp = $1;
        }

        #---- Add script file parent path to module search path
        if ( $sScriptFileParentPathTmp ne "" ){
            unshift( @INC, $sScriptFileParentPathTmp );
        }
    }

    #---- Add script file path to module search path
    unshift( @INC, $sScriptFilePathTmp );

    #---- Add local perl lib path relative to script file parent path to module search path
    unshift( @INC, $sScriptFilePathTmp . "/a2w" );
    unshift( @INC, $sScriptFilePathTmp . "/perl/lib" );
    unshift( @INC, $sScriptFilePathTmp . "/perl/site/lib" );
    unshift( @INC, $sScriptFilePathTmp . "/../../../perl/lib" );
    unshift( @INC, $sScriptFilePathTmp . "/../../../perl/site/lib" );
    
}

use a2w::Config;
use a2w::Document;
use a2w::Font;
use a2w::Index;
use a2w::Kernel;
use a2w::Page;
use a2w::Text;
use a2w::ConfigConstants;
use a2w::DocumentConstants;
use a2w::PageConstants;
use a2w::FontConstants;

use a2w::core::bo::BOPool;
use a2w::core::bo::Constants;
use a2w::core::file::Utils;
use a2w::core::path::Utils; # $V101 Change
use a2w::core::process::Logger;

#-----------------------------------------------------------------------
# Initialize once per process
#-----------------------------------------------------------------------
sub initialize(){

    #---- Get Parameter of initialize( Par: a2w::Config, a2w::Kernel )
    ( $a2wConfigPar, $a2wKernelPar ) = @_;

    #---- Define boolean values
    $TRUE  = 1;    # TRUE  boolean value
    $FALSE = 0;    # FALSE boolean value

    #---- Set/Reset Logging
    $bLog = $FALSE;
    if (index( lc($a2wConfigPar->getAttribute( $a2w::ConfigConstants::LOGGINGLEVEL )), "sf") >= 0 ){
        $bLog = $TRUE;
    }

    #Path Separator
    $PATH_SEP = "/";

    my $sScriptProcTmp = $a2wConfigPar->getAttribute( $a2w::ConfigConstants::SCRIPTPROCEDURE );
    $sScriptArgs	   = $a2wConfigPar->getAttribute( $a2w::ConfigConstants::SCRIPTARGUMENT );
    $sOutputFilePath   = $a2wConfigPar->getAttribute( $a2w::ConfigConstants::OUTPUTFILEPATH );
    $iOutputResolution = $a2wConfigPar->getAttribute( $a2w::ConfigConstants::RESOLUTION );
    $sLogFilePath      = $a2wConfigPar->getAttribute( $a2w::ConfigConstants::LOGPATH );
    $sPDFDocLimits     = $a2wConfigPar->getAttribute( $a2w::ConfigConstants::PDFDOCLIMITS );

    $sSpoolFilename    = $a2wKernelPar->getSpoolFilename();

    #---- Get global BO pool
    $theBOPoolLocal = a2w::core::bo::BOPool::getBOPool();

    #---- Get logger
    $theLoggerLocal = $theBOPoolLocal->getLogger();

    if ( $bLog == $TRUE ){
        #---- List of modules to be logged
        my $sLogModuleListTmp = ""
                                . "main"
                                ;

        #---- Register this module to log
        $theLoggerLocal->registerClasses( $sLogModuleListTmp );

        $bLog = $theLoggerLocal->isRegistered( "main" );

        #---- Open logger
        if ( $sLogModuleListTmp ne "" ){
            $theLoggerLocal->open( $sLogFilePath, "ci.smartfix" );
        }
    } 

    if ( $bLog == $TRUE ){
        $theLoggerLocal->logFunctionName( "main", "initialize" );
        $theLoggerLocal->logMessage( "Running " . $sScriptProcTmp . "..." );
        $theLoggerLocal->logMessage( "Processing " . $sSpoolFilename );
        $theLoggerLocal->logMessage( "Args="
                                     . $sScriptArgs
                                     . ", OutputFilePath: " . $sOutputFilePath
                                   );
    }

    #---- Page process flags
    $APPEND   = 0;    # append page to current document
    $SKIP     = 1;    # skip page
    $NEWDOC   = 2;    # new document
    $LASTPAGE = 4;    # last page of current document

    $PAGE_WIDTH      = 8.27;  # inches, A4 size (8.27 × 11.69) width
    $PAGE_HEIGHT     = 11.69; # inches, A4 size (8.27 × 11.69) height
    $PAGE_RESOLUTION = $iOutputResolution;

    #---- Load extractdoc arguments JSON
    $sSAJSONFile = $sScriptArgs;
    $bFirstInput = (-e $sSAJSONFile) ? $TRUE : $FALSE;

    my $sExtractDocArgsTmp = $sScriptArgs;
    if ( $bFirstInput == $TRUE ){
        my $iRcTmp = open( JSON, "<$sSAJSONFile" );
        if ( $iRcTmp == 0 ){
            return ( -1, "Unable to open extractdoc arguments JSON file $sSAJSONFile. rc=$iRcTmp reason=" . $! );
        }
        $sExtractDocArgsTmp = do { local $/; <JSON> };
        close( JSON );
    }

    #---- Decode extractdoc arguments
    $arefExtractDocArgs = undef;
    eval{
        require JSON::Tiny;
        $arefExtractDocArgs = JSON::Tiny::from_json( $sExtractDocArgsTmp );
    };
    if ( $@ ){ return ( -2, "Unable to parse extractdoc arguments JSON file $sSAJSONFile. rc=$iRcTmp reason=" . $@ ); }
    elsif ( $arefExtractDocArgs == undef ){ return ( -2, "Unable to parse extractdoc arguments JSON file $sSAJSONFile. rc=$iRcTmp" ); }
    if ( $bLog == $TRUE ){
        $theLoggerLocal->logMessage( "ExtractDoc Arguments: $arefExtractDocArgs" );
        my @arrDocsTmp = @{ $arefExtractDocArgs };
        my $iIdxTmp = 0;
        my $sIdxTmp = 0;
        foreach my $e ( @arrDocsTmp ){
            $iIdxTmp++;
            $sIdxTmp = sprintf( "%04d", $iIdxTmp );
            if ( ref( $e ) eq "SCALAR" ){
                $theLoggerLocal->logMessage( "$sIdxTmp:>" . $e );
            }
            elsif ( ref( $e ) eq "ARRAY" ){
                my @arrTmp = @{ $e };
                $theLoggerLocal->logMessage( "$sIdxTmp:>arr(@arrTmp)<" );
            }
            elsif ( ref( $e ) eq "HASH" ){
                my @arrKeysTmp = sort keys %{ $e };
                foreach my $k ( @arrKeysTmp ){
                    $theLoggerLocal->logMessage( "$sIdxTmp:$k>" . $e->{ $k } . "<" );
                }
            }
        }
    }

    #---- Validate extractdoc arguments
    $iMaxPages = 0;
    $iFirstPageId = 0xFFFF;
    $iLastPageId = 0;

    #---- Page info hash (<page id>=> { 'W'=> width, 'H' => height})
    $hrefPageInfo = {}; # $V101 Change

    # NOTE: _processAndValidateArguments will update $iMaxPages and $arefExtractDocArgs values
    my ( $iRetTmp, $sMsgTmp ) = _processAndValidateArguments( $arefExtractDocArgs );
    if ( $iRetTmp < 0 ){ return ( -3, $sMsgTmp ); }
    if ( $bLog == $TRUE ){
        $theLoggerLocal->logMessage( "Lower page id: $iFirstPageId" );
        $theLoggerLocal->logMessage( "Upper page id: $iLastPageId" );
    }

    #---- Set options to split pages for first input
    if ( $bFirstInput == $TRUE ){
        #---- Set JSON content as script argument for following documents
        $a2wConfigPar->setAttribute( $a2w::ConfigConstants::SCRIPTARGUMENT, $sExtractDocArgsTmp );

        #---- Set output format as TIFF
        $sOutputFormat = "tif";
        $a2wConfigPar->setAttribute( $a2w::ConfigConstants::OUTPUTFORMAT, $sOutputFormat );

        #---- Turn on page output
        $a2wConfigPar->setAttribute( $a2w::ConfigConstants::PAGEOUTPUT, "on" );

        #---- Set filenamepattern as <page id>
        $a2wConfigPar->setAttribute( $a2w::ConfigConstants::FILENAMEPATTERN, "\"%d\",PAGEID" );

        #---- Set ending page to stop parsing after required pages
        $a2wConfigPar->setAttribute( $a2w::ConfigConstants::ENDINGPAGE, $iLastPageId );
    }
    else {
        #---- Set output format as PDF/A
        #!!! TODO: Ensure to get user set output format from first input and use the output format here !!!
        $sOutputFormat = "pdfa";
        $a2wConfigPar->setAttribute( $a2w::ConfigConstants::OUTPUTFORMAT, $sOutputFormat );

        #---- Set ending page as document count to stop parsing after generating required output documents
        my @arrDocsTmp = @{ $arefExtractDocArgs };
        my $iDocCountTmp = @arrDocsTmp;
        $a2wConfigPar->setAttribute( $a2w::ConfigConstants::ENDINGPAGE, $iDocCountTmp );

        #---- Reset filenamepattern
        $a2wConfigPar->setAttribute( $a2w::ConfigConstants::FILENAMEPATTERN, "\"%s_%s.%d\",SPOOLNAME,PID,DOCID" );

        #---- Set output path as object container path, so AFP2web core can find the TIFF pages
        $a2wConfigPar->setAttribute( $a2w::ConfigConstants::OBJECTCONTAINERPATH, $sOutputFilePath );

        # $V101 Begin
        #---- Process page info cache files ----#
        #---- Fetch page info files list from output file path
        my $rePgInfoFilterTmp = qr/\d+\.tif\.\d+\.\d+/;
        my @arrPgInfoFNsTmp = a2w::core::path::Utils::list( $sOutputFilePath, $TRUE, $rePgInfoFilterTmp );
        if ( @arrPgInfoFNsTmp <= 0 ){ return ( -1, "No page info cache files found in output path, when searched using pattern ($rePgInfoFilterTmp)" ); }

        #---- Collect page info from cache file name
        foreach my $fn ( @arrPgInfoFNsTmp ){
            if ( $fn =~ /(\d+)\.tif\.(\d+)\.(\d+)/ ){
                $hrefPageInfo->{ $1 }{ 'W' } = $2;
                $hrefPageInfo->{ $1 }{ 'H' } = $3;
            }

            #---- Clean cache file
            a2w::core::file::Utils::deleteFile( $sOutputFilePath . $fn );
            if ( $bLog == $TRUE ){ $theLoggerLocal->logMessage( "Deleted page info cache file $sOutputFilePath$fn" ); }
        }

        if ( $bLog == $TRUE ){
            my @arrKeysTmp = sort keys( %{ $hrefPageInfo } );
            foreach my $k ( @arrKeysTmp ){
                $theLoggerLocal->logMessage( "Page $k: W=>" . $hrefPageInfo->{ $k }{ 'W' } . "< H=>" . $hrefPageInfo->{ $k }{ 'W' } . "<" );
            }
        }
        # $V101 End
    }

    #---- Evaluate output extension
    $sOutputExtension = "." . substr( lc( $sOutputFormat ), 0, 3 );

    #---- Initialize document info
    $iDocId = 0;
    $hrefDocument = undef;
    my @arrAllPagesTmp = ( 1 .. $iLastPageId );
    %hshPages = map { $_ => $FALSE } @arrAllPagesTmp;

    #---- Page count
    if ( $bLog == $TRUE ){
        $theLoggerLocal->logMessage( "PDFDocLimits=$sPDFDocLimits" );
        $theLoggerLocal->logMessage( "PageCount=$iMaxPages" );
    }

    #---- Process and update pdf document limits
    my @arrPDFDocLimitsTmp = ( $sPDFDocLimits );
    if ( index( $sPDFDocLimits, "," ) > 0 ){ # PDFDocLimits multi value format
        #---- Split and get pdf document limits
        @arrPDFDocLimitsTmp = split /,/, $sPDFDocLimits;
    }
    my $iPageLimitTmp = @arrPDFDocLimitsTmp[ 0 ];

    if ( $iPageLimitTmp < $iMaxPages ){
        #---- Modify page limit based on page count
        @arrPDFDocLimitsTmp[ 0 ] = $iMaxPages;
        $sPDFDocLimits = join( ",", @arrPDFDocLimitsTmp );

        if ( $bLog == $TRUE ){
            $theLoggerLocal->logMessage( "New PDFDocLimits=$sPDFDocLimits" );
        }
        $a2wConfigPar->setAttribute( $a2w::ConfigConstants::PDFDOCLIMITS, $sPDFDocLimits );
    }
    return 0;
}

#-----------------------------------------------------------------------
# InitializeDoc for each document
#-----------------------------------------------------------------------
sub initializeDoc(){
    #---- Get Parameter of initializeDoc( Par: a2w::Document )
    ($a2wDocumentPar) = @_;
	
    if ( $bLog == $TRUE ){
        $theLoggerLocal->logFunctionName( "main", "initializeDoc" );
        $theLoggerLocal->logMessage( "Id=" . $a2wDocumentPar->getId() );
    }

    if ( $bFirstInput == $FALSE ){
        #---- Set current document context
        $hrefDocument = $arefExtractDocArgs->[ $iDocId ];
        $iDocId++;

        #---- Set output filename
        $a2wDocumentPar->setOutputFilename( $hrefDocument->{ 'filename' } . $sOutputExtension );
        if ( $bLog == $TRUE ){ $theLoggerLocal->logMessage( "Output filename: " . $hrefDocument->{ 'filename' } ); }
    }

    return 0;
}

#-----------------------------------------------------------------------
# InitializePage for each page
#-----------------------------------------------------------------------
sub initializePage(){
    #---- Get Parameter of initializePage( Par: a2w::Page )
    ($a2wPagePar) = @_;
	
    if ( $bLog == $TRUE ){
        $theLoggerLocal->logFunctionName( "main", "initializePage" );
    }

    return 0;
}

#-----------------------------------------------------------------------
# Main entry method
# Return values:
#        < 0:    error
#         0:    append page to Current Document
#         1:    skip page
#         2:    first page / new document
#-----------------------------------------------------------------------
sub afp2web(){
    my $iPgIdTmp = $a2wPagePar->getParseId();
    if ( $bLog == $TRUE ){
        $theLoggerLocal->logFunctionName( "main", "afp2web" );
        $theLoggerLocal->logMessage( "Id=$iPgIdTmp" );
    }

    #---- Create and add pages for current document ----#
    my $iRetTmp = $APPEND;
    #---- First input
    if ( $bFirstInput == $TRUE ){
        # $V101 Begin
        #---- Write page info (width, height) in cache file ----#
        #
        # Cache filename syntax:
        # <page id>.tif.<width in output resolution>.<height in output resolution>
        #
        #---- Collect page info
        my $iPRTmp = $a2wPagePar->getResolution();
        my $iPWTmp = int( ( $a2wPagePar->getWidth() / $iPRTmp ) * $iOutputResolution );
        my $iPHTmp = int( ( $a2wPagePar->getHeight() / $iPRTmp ) * $iOutputResolution );

        #---- Create cache file
        my $sFNTmp = $sOutputFilePath . $iPgIdTmp . ".tif." . $iPWTmp . "." . $iPHTmp;
        my $iFOTmp = open( PGINFO, ">$sFNTmp" );
        close( PGINFO );
        if ( $iFOTmp == 0 ){ return ( -1, "Unable to cache page $iPgIdTmp info in file $sFNTmp. rc=$iFOTmp reason=" . $! ); }
        # $V101 End
    }
    #---- Following input
    else {
        $iRetTmp = $SKIP | $NEWDOC;

        #---- Get document pages list
        my @arrPagesTmp = @{ $hrefDocument->{ 'pages' } };
        if ( $bLog == $TRUE ){ $theLoggerLocal->logMessage( "Pages=@arrPagesTmp" ); }

        my $iSeqIdTmp = 0;
        my $iImgWTmp  = 0;
        my $iImgHTmp  = 0;

        my $a2wPageTmp = undef;
        my $hrefTFInfoTmp = undef;
        my $sTIFFFilenameTmp = "";
        my $sFQTIFFFilenameTmp = "";
        foreach my $sPageIdTmp ( @arrPagesTmp ){
            #---- Evaluate and assert for page output file
            $sTIFFFilenameTmp = $sPageIdTmp . ".tif";
            $sFQTIFFFilenameTmp = $sOutputFilePath . $sTIFFFilenameTmp;
            if ( !(-e $sFQTIFFFilenameTmp) ){ return ( -1, "Page $sPageIdTmp ($sFQTIFFFilenameTmp) output missing for document " . $hrefDocument->{ 'filename' } ); }

            #---- Create page
            $a2wPageTmp = new a2w::Page();
            if ( $a2wPageTmp == undef ){ return ( -2, "Unable to create page $iPageIdTmp for $sTIFFFilenameTmp" ); }

            # $V101 Begin
            $iImgWTmp = $hrefPageInfo->{ $sPageIdTmp }{ 'W' };
            $iImgHTmp = $hrefPageInfo->{ $sPageIdTmp }{ 'H' };
            # $V101 End

            #---- Format page
            $a2wPageTmp->setResolution( $PAGE_RESOLUTION );
            $a2wPageTmp->setWidth( $iImgWTmp );
            $a2wPageTmp->setHeight( $iImgHTmp );

            #---- Add image
            $a2wPageTmp->addImage( # Filename
                                     $sTIFFFilenameTmp
                                   # X position
                                   , 0
                                   # Y position
                                   , 0
                                   # Width
                                   , $iImgWTmp
                                   # Height
                                   , $iImgHTmp
                                   # Rotation
                                   , 0
                                 );
            if ( $bLog == $TRUE ){ $theLoggerLocal->logMessage( "img>$sTIFFFilenameTmp<@(0,0), W=$iImgWTmp, H=$iImgHTmp, R=0" ); }

            #---- Add page to document
            $iSeqIdTmp++;
            $a2wDocumentPar->addPage( $a2wPageTmp, $iSeqIdTmp );
        }
    }

    return $iRetTmp;
}

#-----------------------------------------------------------------------
# FinalizePage for each page
#-----------------------------------------------------------------------
sub finalizePage(){

    if ( $bLog == $TRUE ){
        $theLoggerLocal->logFunctionName( "main", "finalizePage" );
    }
    return 0;
}

#-----------------------------------------------------------------------
# FinalizeDoc for each document
#-----------------------------------------------------------------------
sub finalizeDoc(){
    if ( $bLog == $TRUE ){
        $theLoggerLocal->logFunctionName( "main", "finalizeDoc" );
    }

    if ( $bFirstInput == $FALSE ){
        my $iPageCountTmp = $a2wDocumentPar->getPageCount();

        #---- Mark document as found
        if ( $iPageCountTmp > 0 ){ $hrefDocument->{ 'found' } = $TRUE; }

        #---- Get document pages list
        my @arrPagesTmp = @{ $hrefDocument->{ 'pages' } };
        my $iDelTmp = 0;
        my $sMsgTmp = "";
        my $sTIFFFilenameTmp = "";
        foreach my $sPageIdTmp ( @arrPagesTmp ){
            #---- Evaluate and assert for page output file
            $sTIFFFilenameTmp = $sOutputFilePath . $sPageIdTmp . ".tif";

            #---- Delete page output file
            # $V102 Begin
            # Delete file only if it exists
            if ( !(-e $sTIFFFilenameTmp) ){ $hshPages{ $sPageIdTmp } = $TRUE; next; }
            ( $iDelTmp, $sMsgTmp ) = a2w::core::file::Utils::deleteFile( $sTIFFFilenameTmp );

            # Ignore cleanup errors, when transformation is succcessful
            #if ( $iDelTmp < 0 ){ return ( $iDelTmp, $sMsgTmp ); }
            #if ( $bLog == $TRUE ){ $theLoggerLocal->logMessage( "Deleted $sTIFFFilenameTmp page of document " . $hrefDocument->{ 'filename' } ); }
            if ( $bLog == $TRUE ){
                if ( $iDelTmp < 0 ){
                    $theLoggerLocal->logMessage( "Could not delete $sTIFFFilenameTmp page of document " . $hrefDocument->{ 'filename' } . ". Reason: " . $sMsgTmp );
                }
                else {
                    $theLoggerLocal->logMessage( "Deleted $sTIFFFilenameTmp page of document " . $hrefDocument->{ 'filename' } );
                }
            }
            # $V102 End
            $hshPages{ $sPageIdTmp } = $TRUE;
        }
    }

    return 0;
}

#-----------------------------------------------------------------------
# Finalize once per process
#-----------------------------------------------------------------------
sub finalize(){

    if ( $bLog == $TRUE ){
        $theLoggerLocal->logFunctionName( "main", "finalize" );
    }

    my $iTransformationErrCodeTmp = $a2wKernelPar->getExitStatus();

    # Assert transformation is successful
    if ( $iTransformationErrCodeTmp >= 0 ){
        if ( $bFirstInput == $TRUE ){
            #---- Delete script argument JSON file
            my ( $iDelTmp, $sMsgTmp ) = a2w::core::file::Utils::deleteFile( $sSAJSONFile );
            # $V102 Begin
            # Ignore cleanup errors, when transformation is succcessful
            #if ( $iDelTmp < 0 ){ return ( $iDelTmp, $sMsgTmp ); }
            if ( $iDelTmp < 0 && $bLog == $TRUE ){ $theLoggerLocal->logMessage( $sMsgTmp ); }
            # $V102 End
        }
        else {
            #---- Assert whether all documents were extracted or not
            my @arrDocsTmp = @{ $arefExtractDocArgs };
            my @arrMissingDocsTmp = grep { $_->{ 'found' } == $FALSE } @arrDocsTmp;

            if ( @arrMissingDocsTmp > 0 ){
                local $" = ',';
                my @arrIdsTmp = ();
                @arrMissingDocsTmp = sort { $a->{ 'id' } <=> $b->{ 'id' } } @arrMissingDocsTmp;
                foreach my $m ( @arrMissingDocsTmp ){ $arrIdsTmp[ @arrIdsTmp ] = $m->{ 'id' }; }

                my $sMsgTmp = "Pages " . $arrMissingDocsTmp[ 0 ]->{ 'pages' }->[ 0 ] . "-$iLastPageId where missing for document(s) @arrIdsTmp";

                if ( $bLog == $TRUE ){ $theLoggerLocal->logMessage( $sMsgTmp ); }
                return ( -1, $sMsgTmp );
            }

            #---- Delete unused page outputs
            my @arrUnusedPageIdTmp = sort keys( %hshPages );
            @arrUnusedPageIdTmp = grep { $hshPages{ $_ } == $FALSE } @arrUnusedPageIdTmp;
            my $iDelTmp = 0;
            my $sMsgTmp = "";
            my $sTIFFFilenameTmp = "";
            if ( $bLog == $TRUE ){ $theLoggerLocal->logMessage( "Deleting unused pages @arrUnusedPageIdTmp" ); }
            foreach my $sPageIdTmp ( @arrUnusedPageIdTmp ){
                #---- Evaluate and assert for page output file
                $sTIFFFilenameTmp = $sOutputFilePath . $sPageIdTmp . ".tif";

                #---- Delete page output file
                # $V102 Begin
                # Delete file only if it exists
                if ( !(-e $sTIFFFilenameTmp) ){ $hshPages->{ $sPageIdTmp } = $FALSE; next; }
                ( $iDelTmp, $sMsgTmp ) = a2w::core::file::Utils::deleteFile( $sTIFFFilenameTmp );

                # Ignore cleanup errors, when transformation is succcessful
                #if ( $iDelTmp < 0 ){ return ( $iDelTmp, $sMsgTmp ); }
                #if ( $bLog == $TRUE ){ $theLoggerLocal->logMessage( "Deleted unused $sTIFFFilenameTmp page" ); }
                if ( $bLog == $TRUE ){
                    if ( $iDelTmp < 0 ){
                        $theLoggerLocal->logMessage( "Could not delete unused $sTIFFFilenameTmp page of document " . $hrefDocument->{ 'filename' } . ". Reason: " . $sMsgTmp );
                    }
                    else {
                        $theLoggerLocal->logMessage( "Deleted unused $sTIFFFilenameTmp page of document " . $hrefDocument->{ 'filename' } );
                    }
                }
                # $V102 End
                $hshPages->{ $sPageIdTmp } = $FALSE;
            }
        }
    }

    return 0;
}

#-----------------------------------------------------------------------
# Process and Validate arguments
#
# Process and Validate extract document JSON arguments
# Returns >=0 on valid arguments else <0, error message
#
#-----------------------------------------------------------------------
sub _processAndValidateArguments{

    if ( $bLog == $TRUE ){
        $theLoggerLocal->logFunctionName( "main", "_processAndValidateArguments" );
    }

    #---- Get parameter
    #
    # 1. Document arguments (of type array reference)
    #
    my $arefDocsPar = shift;

    #---- Iterate through document arguments and validate them
    my $iIdTmp = 0;
    my @arrMissingPGTmp = (); # list of document (whose pages argument is missing) ids
    my @arrMissingFNTmp = (); # list of document (whose filename argument is missing) ids

    my $iPgCntTmp = 0;
    my @arrDocsTmp = @{ $arefDocsPar };
    foreach my $d ( @arrDocsTmp ){
        $iIdTmp++;

        #---- Assert pages, filename values
        if ( !defined( $d->{ 'pages' } ) || length( $d->{ 'pages' } ) <= 0 ){
            $arrMissingPGTmp[ @arrMissingPGTmp ] = $iIdTmp;
            next;
        }
        if ( !defined( $d->{ 'filename' } ) || length( $d->{ 'filename' } ) <= 0 ){
            $arrMissingFNTmp[ @arrMissingFNTmp ] = $iIdTmp;
            next;
        }

        my @arrPagesTmp = split /,/, $d->{ 'pages' }; # convert string to array of page ids
        if ( @arrPagesTmp <= 0 ){ next; }

        $iPgCntTmp = @arrPagesTmp;
        $d->{ 'pages' } = \@arrPagesTmp;
        $d->{ 'pagecount' } = $iPgCntTmp;

        my @arrSortedPagesTmp = sort { $a <=> $b } @arrPagesTmp;
        $d->{ 'sorted_pages' } = \@arrSortedPagesTmp;

        if ( $iPgCntTmp > $iMaxPages ){ $iMaxPages = $iPgCntTmp; }
        if ( $iFirstPageId > $arrSortedPagesTmp[ 0 ] ){ $iFirstPageId = $arrSortedPagesTmp[ 0 ]; }
        if ( $iLastPageId < $arrSortedPagesTmp[ $#arrSortedPagesTmp ] ){ $iLastPageId = $arrSortedPagesTmp[ $#arrSortedPagesTmp ]; }
    }

    #---- Assert arguments for mandatory fields
    my $iValidTmp = 0;
    my $sSeparatorTmp = "";
    my $sMsgTmp = "";
    if ( @arrMissingPGTmp > 0 ){
        $iValidTmp = -1;
        local $" = ',';
        $sMsgTmp .= "'pages' argument is missing or empty for document(s) @arrMissingPGTmp";
        $sSeparatorTmp = ", ";
    }
    if ( @arrMissingFNTmp > 0 ){
        $iValidTmp = -1;
        local $" = ',';
        $sMsgTmp .= $sSeparatorTmp . "'filename' argument is missing or empty for document(s) @arrMissingFNTmp";
    }
    if ( $iValidTmp < 0 ){ return ( $iValidTmp, $sMsgTmp ); }

    #---- Set proper array of documents
    $arefExtractDocArgs = \@arrDocsTmp;

    return $iValidTmp;
}

__END__
