#-------------------------------------------------------------------------------
#  ci.smartfix_extractDocs_sorted_pages.pm:
#
#  Module to create N PDFs for the specified document pages
#
#  Call:
#
#  On Unix:    ./afp2web -q -c -doc_cold -sp:ci.smartfix_extractDocs_sorted_pages.pm -sa:extractdoc_args.json samples/afpsample.afp
#
#  where
#  extractdoc_args.json must have array of document contexts as given below
#  [
#      { "pages":"1,2,3,4,5,6", "filename":"ci.smartfix_2017-06-23_11_111843-0000-0001" }
#    , { "pages":"7,8,9", "filename":"..." }
#  ]
#
#  Author    : Fa. Maas
#  Copyright : (C) 2016-2017 by Maas Holding GmbH
#
#  $V100   2016-03-04    Initial Release
#
#  $V101   2017-04-03    Extended to set proper PDFDocLimits based on extracted pages count to avoid
#                        "Too many pages: XXX. Increase limit by mpdf_open()" error
#                        JiRa: OTS-1164
#                        Extended by: Panneer
#
#  $V102   2017-06-28    Extended to extract all documents at once
#                        JiRa: OTS-1498
#                        Extended by: Panneer
#
#  $V103   2017-07-12    Extended to report error immediate up on finding first error
#                        JiRa: OTS-1556
#                        Extended by: Panneer
#
#  $V104   2018-01-19    Bug:
#                        Extracting document failed with following error
#                        E088: Scripting Facility Error (rc=-1): Unable to delete file /mnt/transfer/tmp/oxs-smartfix-server/171211153203435-73215/2.tif rc=0
#                        reason=No such file or directory, rc=-88, ScriptArgs=[{"pages":"2,2","filename":"ci.smartfix_2017-12-10_99_000073-0000-0000"}] ...
#
#                        Reason:
#                        "pages" entry in script argument should have unique page id, but in given transformation page 2 is listed twice.
#                        Scripting facility cleans up extracted TIFF pages after adding them on output based on "pages" list. When same
#                        page id is listed more than once, deleting that page second time gives error (as it is deleted already)
#
#                        Fix:
#                        a. It does not make sense to abort a transformation when cleaning up things at the end of it when that transformation was successful.
#                           So, ignore cleanup errors
#                        b. Delete a file only if it exists
#
#                        JiRa, Customer:
#                        OTS-2033, Hallesche
#
#                        Fixed by:
#                        Panneer
#
#
#-------------------------------------------------------------------------------

#-----------------------------------------------------------------------
# BEGIN block of module
#
# Extends PERL module search path array (@INC) with new element having
# this script modules path in order to have better module portability
#-----------------------------------------------------------------------
BEGIN {
    #---- Fetch script filename
    my $sScriptFilenameTmp = $0;

    #---- Extract script file path from script filename
    my $sScriptFilePathTmp = "";
    if ( $sScriptFilenameTmp =~ /(.*)\/.*\.pm/ ){
        $sScriptFilePathTmp = $1;
    }

    #printf STDERR ( "Script filename: " . $0 . " Script filepath: " . $sScriptFilePathTmp . "\n" );
    if ( $sScriptFilePathTmp eq "" ){
        $sScriptFilePathTmp = ".";
    }
    else {
        my $sScriptFileParentPathTmp = "";
        if ( $sScriptFilePathTmp =~ /(.*)\/sfsamples/ ){
            $sScriptFileParentPathTmp = $1;
        }

        #---- Add script file parent path to module search path
        if ( $sScriptFileParentPathTmp ne "" ){
            unshift( @INC, $sScriptFileParentPathTmp );
        }
    }

    #---- Add script file path to module search path
    unshift( @INC, $sScriptFilePathTmp );

    #---- Add local perl lib path relative to script file parent path to module search path
    unshift( @INC, $sScriptFilePathTmp . "/perl/site/lib" );
    unshift( @INC, $sScriptFilePathTmp . "/perl/lib" );
    unshift( @INC, $sScriptFilePathTmp . "/../../../perl/site/lib" );
    unshift( @INC, $sScriptFilePathTmp . "/../../../perl/lib" );
    unshift( @INC, $sScriptFilePathTmp . "/a2w" ); # $V102 Change
    
}

use a2w::Config;
use a2w::Document;
use a2w::Font;
use a2w::Index;
use a2w::Kernel;
use a2w::Page;
use a2w::Text;
use a2w::ConfigConstants;
use a2w::DocumentConstants;
use a2w::PageConstants;
use a2w::FontConstants;

use a2w::core::bo::BOPool;
use a2w::core::bo::Constants;
use a2w::core::process::Logger;

#-----------------------------------------------------------------------
# Initialize once per process
#-----------------------------------------------------------------------
sub initialize(){

    #---- Get Parameter of initialize( Par: a2w::Config, a2w::Kernel )
    ( $a2wConfigPar, $a2wKernelPar ) = @_;

    #---- Define boolean values
    $TRUE  = 1;    # TRUE  boolean value
    $FALSE = 0;    # FALSE boolean value

    #---- Set/Reset Logging
    $bLog = $FALSE;
    if (index( lc($a2wConfigPar->getAttribute( $a2w::ConfigConstants::LOGGINGLEVEL )), "sf") >= 0 ){
        $bLog = $TRUE;
    }

    #Path Separator
    $PATH_SEP = "/";

    my $sScriptProcTmp = $a2wConfigPar->getAttribute( $a2w::ConfigConstants::SCRIPTPROCEDURE );
    $sScriptArgs	   = $a2wConfigPar->getAttribute( $a2w::ConfigConstants::SCRIPTARGUMENT );
    $sOutputFilePath   = $a2wConfigPar->getAttribute( $a2w::ConfigConstants::OUTPUTFILEPATH );
    $sOutputFormat     = $a2wConfigPar->getAttribute( $a2w::ConfigConstants::OUTPUTFORMAT ); # $V102 Change
    $sLogFilePath      = $a2wConfigPar->getAttribute( $a2w::ConfigConstants::LOGPATH );
    $sPDFDocLimits     = $a2wConfigPar->getAttribute( $a2w::ConfigConstants::PDFDOCLIMITS );
    $sSpoolFilename    = $a2wKernelPar->getSpoolFilename();

    #---- Get global BO pool
    $theBOPoolLocal = a2w::core::bo::BOPool::getBOPool();

    #---- Get logger
    $theLoggerLocal = $theBOPoolLocal->getLogger();

    if ( $bLog == $TRUE ){
        #---- List of modules to be logged
        my $sLogModuleListTmp = ""
                                . "main"
                                ;

        #---- Register this module to log
        $theLoggerLocal->registerClasses( $sLogModuleListTmp );

        $bLog = $theLoggerLocal->isRegistered( "main" );

        #---- Open logger
        if ( $sLogModuleListTmp ne "" ){
            $theLoggerLocal->open( $sLogFilePath, "ci.smartfix" );
        }
    } 

    if ( $bLog == $TRUE ){
        $theLoggerLocal->logFunctionName( "main", "initialize" );
        $theLoggerLocal->logMessage( "Running " . $sScriptProcTmp . "..." );
        $theLoggerLocal->logMessage( "Processing " . $sSpoolFilename );
        $theLoggerLocal->logMessage( "Args="
                                     . $sScriptArgs
                                     . ", OutputFilePath: " . $sOutputFilePath
                                   );
    }

    #---- Page process flags
    $APPEND   = 0;    # append page to current document
    $SKIP     = 1;    # skip page
    $NEWDOC   = 2;    # new document
    $LASTPAGE = 4;    # last page of current document # $V102 Change

    # $V102 Begin
    #---- Evaluate output extension
    $sOutputExtension = "." . substr( lc( $sOutputFormat ), 0, 3 );

    #---- Load extractdoc arguments JSON
    $sSAJSONFile = $sScriptArgs;
    my $iRcTmp = open( JSON, "<$sSAJSONFile" );
    if ( $iRcTmp == 0 ){
        return ( -1, "Unable to open extractdoc arguments JSON file $sSAJSONFile. rc=$iRcTmp reason=" . $! );
    }
    my $sExtractDocArgsTmp = do { local $/; <JSON> };
    close( JSON );

    #---- Decode extractdoc arguments
    $arefExtractDocArgs = undef;
    eval{
        require JSON::Tiny;
        $arefExtractDocArgs = JSON::Tiny::from_json( $sExtractDocArgsTmp );
    };
    if ( $@ ){
        return ( -2, "Unable to parse extractdoc arguments JSON file $sSAJSONFile. rc=$iRcTmp reason=" . $@ );
    }
    elsif ( $arefExtractDocArgs == undef ){
        return ( -2, "Unable to parse extractdoc arguments JSON file $sSAJSONFile. rc=$iRcTmp" );
    }
    if ( $bLog == $TRUE ){
        $theLoggerLocal->logMessage( "ExtractDoc Arguments: $arefExtractDocArgs" );
        my @arrDocsTmp = @{ $arefExtractDocArgs };
        my $iIdxTmp = 0;
        my $sIdxTmp = 0;
        foreach my $e ( @arrDocsTmp ){
            $iIdxTmp++;
            $sIdxTmp = sprintf( "%04d", $iIdxTmp );
            if ( ref( $e ) eq "SCALAR" ){
                $theLoggerLocal->logMessage( "$sIdxTmp:>" . $e );
            }
            elsif ( ref( $e ) eq "ARRAY" ){
                my @arrTmp = @{ $e };
                $theLoggerLocal->logMessage( "$sIdxTmp:>arr(@arrTmp)<" );
            }
            elsif ( ref( $e ) eq "HASH" ){
                my @arrKeysTmp = sort keys %{ $e };
                foreach my $k ( @arrKeysTmp ){
                    $theLoggerLocal->logMessage( "$sIdxTmp:$k>" . $e->{ $k } . "<" );
                }
            }
        }
    }

    #---- Validate extractdoc arguments
    $iMaxPages = 0;
    $iFirstPageId = 0; # $V103 Change
    $iLastPageId = 0;  # $V103 Change

    # NOTE: _processAndValidateArguments will update $iMaxPages and $arefExtractDocArgs values
    my ( $iRetTmp, $sMsgTmp ) = _processAndValidateArguments( $arefExtractDocArgs );
    if ( $iRetTmp < 0 ){
        return ( -3, $sMsgTmp );
    }

    #---- Initialize document info
    $iDocId = 0;
    %hshPageIds = ();
    %hshPageFound = ();
    $hrefDocument = undef;
    # $V102 End

    # $V101 Begin
    #---- Page count
    if ( $bLog == $TRUE ){
        $theLoggerLocal->logMessage( "PDFDocLimits=$sPDFDocLimits" );
        $theLoggerLocal->logMessage( "PageCount=$iMaxPages" );
    }

    #---- Process and update pdf document limits
    my @arrPDFDocLimitsTmp = ( $sPDFDocLimits );
    if ( index( $sPDFDocLimits, "," ) > 0 ){ # PDFDocLimits multi value format
        #---- Split and get pdf document limits
        @arrPDFDocLimitsTmp = split /,/, $sPDFDocLimits;
    }
    my $iPageLimitTmp = @arrPDFDocLimitsTmp[ 0 ];

    if ( $iPageLimitTmp < $iMaxPages ){
        #---- Modify page limit based on page count
        @arrPDFDocLimitsTmp[ 0 ] = $iMaxPages;
        $sPDFDocLimits = join( ",", @arrPDFDocLimitsTmp );

        if ( $bLog == $TRUE ){
            $theLoggerLocal->logMessage( "New PDFDocLimits=$sPDFDocLimits" );
        }
        $a2wConfigPar->setAttribute( $a2w::ConfigConstants::PDFDOCLIMITS, $sPDFDocLimits );
    }
    # $V101 End
    return 0;
}

#-----------------------------------------------------------------------
# InitializeDoc for each document
#-----------------------------------------------------------------------
sub initializeDoc(){
    #---- Get Parameter of initializeDoc( Par: a2w::Document )
    ($a2wDocumentPar) = @_;
	
    if ( $bLog == $TRUE ){
        $theLoggerLocal->logFunctionName( "main", "initializeDoc" );
        $theLoggerLocal->logMessage( "Id=" . $a2wDocumentPar->getId() );
    }

    # $V102 Begin
    #---- Set current document context ----#
    #---- Get document arguments
    $hrefDocument = $arefExtractDocArgs->[ $iDocId ];
    $iDocId++;

    #---- Get document pages list
    my @arrIdsTmp = @{ $hrefDocument->{ 'pages' } };

    #---- Convert array to hashmap with afp2web return values
    %hshPageIds = map { $_ => $APPEND } @arrIdsTmp;
    $hshPageIds{ $arrIdsTmp[ $#arrPages ] } = $LASTPAGE;

    %hshPageFound = map { $_ => $FALSE } @arrIdsTmp;

    #---- Set output filename
    $a2wDocumentPar->setOutputFilename( $hrefDocument->{ 'filename' } . $sOutputExtension );
    # $V102 End

    return 0;
}

#-----------------------------------------------------------------------
# InitializePage for each page
#-----------------------------------------------------------------------
sub initializePage(){
    #---- Get Parameter of initializePage( Par: a2w::Page )
    ($a2wPagePar) = @_;
	
    if ( $bLog == $TRUE ){
        $theLoggerLocal->logFunctionName( "main", "initializePage" );
    }

    return 0;
}

#-----------------------------------------------------------------------
# Main entry method
# Return values:
#        < 0:    error
#         0:    append page to Current Document
#         1:    skip page
#         2:    first page / new document
#-----------------------------------------------------------------------
sub afp2web(){
    # $V102 Begin
    my $iPgIdTmp = $a2wPagePar->getParseId();
    if ( $bLog == $TRUE ){
        $theLoggerLocal->logFunctionName( "main", "afp2web" );
        $theLoggerLocal->logMessage( "Id=$iPgIdTmp" );
    }

    #---- Evaluate return value
    my $iRetTmp = $hshPageIds{ $iPgIdTmp };
	if ( !defined( $hshPageIds{ $iPgIdTmp } ) ){ $iRetTmp = $SKIP; }
    if ( $bLog == $TRUE ){ $theLoggerLocal->logMessage( "Id=$iPgIdTmp a2w rc=$iRetTmp" ); }

    #---- Mark page is found
    $hshPageFound{ $iPgIdTmp } = $TRUE;

    return $iRetTmp;
    # $V102 End
}

#-----------------------------------------------------------------------
# FinalizePage for each page
#-----------------------------------------------------------------------
sub finalizePage(){

    if ( $bLog == $TRUE ){
        $theLoggerLocal->logFunctionName( "main", "finalizePage" );
    }
    return 0;
}

#-----------------------------------------------------------------------
# FinalizeDoc for each document
#-----------------------------------------------------------------------
sub finalizeDoc(){

    if ( $bLog == $TRUE ){
        $theLoggerLocal->logFunctionName( "main", "finalizeDoc" );
    }

    # $V102 Begin
    my $iPageCountTmp = $a2wDocumentPar->getPageCount();

    #---- Mark document as found
    if ( $iPageCountTmp > 0 ){ $hrefDocument->{ 'found' } = $TRUE; }

    # Assert whether all pages were extracted or not
    my @arrIdsTmp = sort keys( %hshPageFound );
    @arrIdsTmp = grep { $hshPageFound{ $_ } == $FALSE } @arrIdsTmp;
    # $V103 Begin
    #if ( $iPageCountTmp > 0 && @arrIdsTmp > 0 ){
    if ( @arrIdsTmp > 0 ){
        local $" = ',';
        my $sPgTmp = "Page @arrIdsTmp is";
        if ( @arrIdsTmp > 1 ){ $sPgTmp = "Pages @arrIdsTmp are"; }
        my $sMsgTmp = "$sPgTmp missing for document $iDocId";
        if ( $bLogTmp == $TRUE ){ $theLoggerLocal->logMessage( $sMsgTmp ); }
        return ( -1, $sMsgTmp );
    }
    # $V103 End
    # $V102 End

    return 0;
}

#-----------------------------------------------------------------------
# Finalize once per process
#-----------------------------------------------------------------------
sub finalize(){

    if ( $bLog == $TRUE ){
        $theLoggerLocal->logFunctionName( "main", "finalize" );
    }

    # $V102 Begin
    my $iTransformationErrCodeTmp = $a2wKernelPar->getExitStatus();

    # Assert transformation is successful
    if ( $iTransformationErrCodeTmp >= 0 ){
        # Assert whether all documents were extracted or not
        my @arrDocsTmp = @{ $arefExtractDocArgs };
        my @arrMissingDocsTmp = grep { $_->{ 'found' } == $FALSE } @arrDocsTmp;

        if ( @arrMissingDocsTmp > 0 ){
            local $" = ',';
            my @arrIdsTmp = ();
            @arrMissingDocsTmp = sort { $a->{ 'id' } <=> $b->{ 'id' } } @arrMissingDocsTmp; # $V103 Change
            foreach my $m ( @arrMissingDocsTmp ){ $arrIdsTmp[ @arrIdsTmp ] = $m->{ 'id' }; }

            # $V103 Begin
            my $sMsgTmp = "Pages " . $arrMissingDocsTmp[ 0 ]->{ 'pages' }->[ 0 ] . "-$iLastPageId where missing for document(s) @arrIdsTmp";
            # $V103 End

            if ( $bLogTmp == $TRUE ){ $theLoggerLocal->logMessage( $sMsgTmp ); }
            return ( -1, $sMsgTmp );
        }

        #---- Delete script argument JSON file
        # $V104 Begin
        # Delete file only if it exists
        if ( -e $sSAJSONFile ){
            my $iRcTmp = unlink( $sSAJSONFile );
            if ( $iRcTmp <= 0 ){
                if ( $bLogTmp == $TRUE ){ $theLoggerLocal->logMessage( "Unable to delete file " . $sSAJSONFile . " rc=" . $iRcTmp . " reason=" . $! ); }

                # Ignore cleanup errors, when transformation is succcessful
                #return ( -2, "Unable to delete file " . $sSAJSONFile . " rc=" . $iRcTmp . " reason=" . $! );
            }
        }
        # $V104 End
    }
    # $V102 End

    return 0;
}

#-----------------------------------------------------------------------
# Process and Validate arguments
#
# Process and Validate extract document JSON arguments
# Returns >=0 on valid arguments else <0, error message
#
#-----------------------------------------------------------------------
sub _processAndValidateArguments{

    if ( $bLog == $TRUE ){
        $theLoggerLocal->logFunctionName( "main", "_processAndValidateArguments" );
    }

    #---- Get parameter
    #
    # 1. Document arguments (of type array reference)
    #
    my $arefDocsPar = shift;

    #---- Iterate through document arguments and validate them
    my $iIdTmp = 0;
    my @arrMissingPGTmp = (); # list of document (whose pages argument is missing) ids
    my @arrMissingFNTmp = (); # list of document (whose filename argument is missing) ids

    my @arrDocsTmp = @{ $arefDocsPar };
    foreach my $d ( @arrDocsTmp ){
        $iIdTmp++;

        if ( !defined( $d->{ 'pages' } ) ){
            $arrMissingPGTmp[ @arrMissingPGTmp ] = $iIdTmp;
        }
        else {
            my @arrPagesTmp = split /,/, $d->{ 'pages' }; # convert string to array of page ids
            @arrPagesTmp = sort { $a <=> $b } @arrPagesTmp;
            $d->{ 'pages' } = \@arrPagesTmp;
            my $iPgCntTmp = @arrPagesTmp;
            $d->{ 'pagecount' } = $iPgCntTmp;

            if ( $iPgCntTmp > $iMaxPages ){ $iMaxPages = $iPgCntTmp; }
            if ( $iFirstPageId > $arrPagesTmp[ 0 ] ){ $iFirstPageId = $arrPagesTmp[ 0 ]; } # $V103 Change
            if ( $iLastPageId < $arrPagesTmp[ $#arrPagesTmp ] ){ $iLastPageId = $arrPagesTmp[ $#arrPagesTmp ]; } # $V103 Change
        }

        if ( !defined( $d->{ 'filename' } ) ){
            $arrMissingFNTmp[ @arrMissingFNTmp ] = $iIdTmp;
        }
    }

    #---- Assert arguments for mandatory fields
    my $iValidTmp = 0;
    my $sSeparatorTmp = "";
    my $sMsgTmp = "";
    if ( @arrMissingPGTmp > 0 ){
        $iValidTmp = -1;
        local $" = ',';
        $sMsgTmp .= "'pages' argument is missing for document(s) @arrMissingPGTmp";
        $sSeparatorTmp = ", ";
    }
    if ( @arrMissingFNTmp > 0 ){
        $iValidTmp = -1;
        local $" = ',';
        $sMsgTmp .= $sSeparatorTmp . "'filename' argument is missing for document(s) @arrMissingFNTmp";
    }
    if ( $iValidTmp < 0 ){ return ( $iValidTmp, $sMsgTmp ); }

    #---- Sort out document arguments based on starting page id
    @arrDocsTmp = sort { $a->{ 'pages' }[ 0 ] <=> $b->{ 'pages' }[ 0 ] } @arrDocsTmp;

    #---- Improve document with additional processing attributes (id, found)
    $iIdTmp = 0;
    foreach my $d ( @arrDocsTmp ){ $iIdTmp++; $d->{ 'id' } = $iIdTmp; $d->{ 'found' } = $FALSE; }

    if ( $bLog == $TRUE ){
        $theLoggerLocal->logMessage( "Sorted documents" );
        local $" = ',';
        my $iIdxTmp = 0;
        my $sIdxTmp = 0;
        my @arrPagesTmp = ();

        foreach my $e ( @arrDocsTmp ){
            $iIdxTmp++;
            $sIdxTmp = sprintf( "%04d", $iIdxTmp );
            @arrPagesTmp = @{ $e->{ 'pages' } };

            $theLoggerLocal->logMessage( "$sIdxTmp:pages:>@arrPagesTmp<" );
            $theLoggerLocal->logMessage( "$sIdxTmp:filename:>" . $e->{ 'filename' } . "<" );
        }
    }

    #---- Assert documents for proper sequence
    # $V103 Begin
    my $i1stPgIdTmp = 0;
    my $iEndPgIdTmp = 0;
    my $sDocName1Tmp = "";
    my $sDocName2Tmp = "";
    for ( my $i = 1; $i < @arrDocsTmp; $i++ ){
        #---- if document A's last page id is greater then document B's first page id, then it is error case
        $i1stPgIdTmp  = ($arrDocsTmp[ $i ])->{ 'pages' }[ 0 ];
        $sDocName2Tmp = ($arrDocsTmp[ $i ])->{ 'filename' };
        $iEndPgIdTmp  = ($arrDocsTmp[ $i - 1 ])->{ 'pages' }[ $d->{ 'pagecount' } - 1 ];
        $sDocName1Tmp = ($arrDocsTmp[ $i - 1 ])->{ 'filename' };
        if ( $iEndPgIdTmp > $i1stPgIdTmp ){
            $iValidTmp = -1;
            $sMsgTmp = "Mismatching end page id (" . $iEndPgIdTmp . ") of document (" . $sDocName1Tmp . ") against start page id (" . $i1stPgIdTmp . ") of document (" . $sDocName2Tmp . ")";
            last; # break the loop
        }
    }
    # $V103 End
    if ( $iValidTmp < 0 ){ return ( $iValidTmp, $sMsgTmp ); }

    #---- Set proper array of documents
    $arefExtractDocArgs = \@arrDocsTmp;

    return $iValidTmp;
}

__END__
