#-------------------------------------------------------------------------------
#  a2w/core/dm/MiningUtils.pm
#
#  Perl module with data mining APIs
#
#  Author   : Panneer, AFP2web Team
#
#  $V100   2018-10-29    Initial Release
#
#-------------------------------------------------------------------------------
package a2w::core::dm::MiningUtils;

#-----------------------------------------------------------------------
# Include required modules
#-----------------------------------------------------------------------
use a2w::Page;
use a2w::TypeConstants;

use a2w::core::dm::Constants;
use a2w::core::dm::Block;
use a2w::core::dm::Database;

use a2w::core::log::Logger;

#---- Declare constants
$TRUE  = $a2w::TypeConstants::TRUE;    # TRUE  boolean value
$FALSE = $a2w::TypeConstants::FALSE;   # FALSE boolean value

#-----------------------------------------------------------------------
# Constructor
#-----------------------------------------------------------------------

#-----------------------------------------------------------------------
# Destructor
#-----------------------------------------------------------------------

#-----------------------------------------------------------------------
# Mutators
#-----------------------------------------------------------------------

#-----------------------------------------------------------------------
# Accessors
#-----------------------------------------------------------------------

#-----------------------------------------------------------------------
# Workers
#-----------------------------------------------------------------------
#-----------------------------------------------------------------------
# findEyecatcher
#
# Find eyecatcher on given content
#
# Parameters:
# content     Array of objects (sorted top to bottom and left to right)
# options     options is an object, it should be defined as follow
#             "options":{
#               'xRange':        {'from':0,'to':100000}, // where xRange, from and to are optional
#               'yRange':        {'from':0,'to':100000}, // where yRange, from and to are optional
#               'reEC':          <regular expression>,   // eyecatcher regexpr
#               'start':         OPTIONAL: specify from which array index the search should start from, default is 0
#               'end':           OPTIONAL: specify up to which array index the search should go, default is maximum of content array
#               'objectType':    Type of eyecatcher object (text, line, vector, image, container). Default is text
#             }
#                  
# Prototypes:
#  ecIndexName = findEyecatcher(content, options)   where options is as defined above
# 
# Example:
#  ecIndexName = findEyecatcher(content,{'reEC' => qr/^Datum.*$/i})
# 
# Possible extension
#    - Collect the object within an area, i.e. no eyecatcher but an area (xRange & yRange)
#
#-----------------------------------------------------------------------
sub findEyecatcher{
    #---- Get logger
    $theLogger  = a2w::core::log::Logger->getSingleton();
    my $bLogTmp = $theLogger->isRegistered( __PACKAGE__ );

    if ( $bLogTmp == $TRUE ){ $theLogger->logFunctionName( __PACKAGE__, "findEyecatcher" ); }

    #---- Get parameters
    #
    my $arefContentPar = shift;
    my $hrefOptionsPar = shift;

    #---- Assert parameters
    if ( $arefContentPar == undef || $hrefOptionsPar == undef ){ return undef; }
    my @arrContentTmp = @{ $arefContentPar };
    if ( @arrContentTmp <= 0 ){ return undef; }

    #---- Assert reEC
    unless ( defined( $hrefOptionsPar->{ 'reEC' } ) ){ return undef; }
    unless ( lc( ref( \$hrefOptionsPar->{ 'reEC' } ) ) ne "regexp" ){ return undef; } # assert reEC is regexp pattern
  
    #---- Assert options
    # Assert X range
    if ( !defined( $hrefOptionsPar->{ 'xRange' } ) ){ $hrefOptionsPar->{ 'xRange' } = { 'from' => 0, 'to' => 100000 }; }
    else {
        unless( defined( $hrefOptionsPar->{ 'xRange' }{ 'from' } ) ){ $hrefOptionsPar->{ 'xRange' }{ 'from' } = 0; }
        unless( defined( $hrefOptionsPar->{ 'xRange' }{ 'to' } ) ){ $hrefOptionsPar->{ 'xRange' }{ 'to' } = 100000; }
    }

    # Assert Y range
    if ( !defined( $hrefOptionsPar->{ 'yRange' } ) ){ $hrefOptionsPar->{ 'yRange' } = { 'from' => 0, 'to' => $arrContentTmp[ $#arrContentTmp ]->{ $a2w::core::dm::Constants::AT_ADJ_YPOS } }; }
    else {
        unless( defined( $hrefOptionsPar->{ 'yRange' }{ 'from' } ) ){ $hrefOptionsPar->{ 'yRange' }{ 'from' } = 0; }
        unless( defined( $hrefOptionsPar->{ 'yRange' }{ 'to' } ) ){ $hrefOptionsPar->{ 'yRange' }{ 'to' } = $arrContentTmp[ $#arrContentTmp ]->{ $a2w::core::dm::Constants::AT_ADJ_YPOS }; }
    }

    #---- Assert start index (from which should we start to search)
    unless ( defined( $hrefOptionsPar->{ 'start' } ) ){ $hrefOptionsPar->{ 'start' } = 0; }

    #---- Assert end index (up to which should we search for)
    unless ( defined( $hrefOptionsPar->{ 'end' } ) ){ $hrefOptionsPar->{ 'end' } = $#arrContentTmp; }

    #---- Assert object type
    my %hshObjTypeMapTmp = (
          'text'      => $a2w::core::dm::Constants::OT_TEXT
        , 'line'      => $a2w::core::dm::Constants::OT_LINE
        , 'vector'    => $a2w::core::dm::Constants::OT_VECTOR
        , 'image'     => $a2w::core::dm::Constants::OT_IMAGE
        , 'container' => $a2w::core::dm::Constants::OT_CONTAINER
    );
    if ( defined( $hshObjTypeMapTmp{ lc( $hrefOptionsPar->{ 'objectType' } ) } ) ){ $hrefOptionsPar->{ 'objectType' } = $hshObjTypeMapTmp{ lc( $hrefOptionsPar->{ 'objectType' } ) }; }
    unless ( defined( $hrefOptionsPar->{ 'objectType' } ) ){ $hrefOptionsPar->{ 'objectType' } = $a2w::core::dm::Constants::OT_TEXT; }

    #if ( $bLogTmp == $TRUE ){ $theLogger->logHashMessage( $hrefOptionsPar ); }

    #---- Array of matching objects
    my @arrECTmp = ();

    #---- Look for the first text obj that matches the given x and y ranges and the given pattern
    my $pomObjTmp = undef;
    if ( $hrefOptionsPar->{ 'objectType' } == $a2w::core::dm::Constants::OT_TEXT ){ # Eyecatcher is text
        for ( my $i = $hrefOptionsPar->{ 'start' }; $i <= $hrefOptionsPar->{ 'end' }; $i++ ){
            $pomObjTmp = $arrContentTmp[ $i ]->{ 'POMOBJ' };
            if (    ( $pomObjTmp->{ $a2w::core::dm::Constants::AT_OBJTYPE } == $hrefOptionsPar->{ 'objectType' } )
                 && ( $pomObjTmp->{ $a2w::core::dm::Constants::AT_ADJ_YPOS } >= $hrefOptionsPar->{ 'yRange' }{ 'from' } && $pomObjTmp->{ $a2w::core::dm::Constants::AT_ADJ_YPOS } <= $hrefOptionsPar->{ 'yRange' }{ 'to' } )
                 && ( $pomObjTmp->{ $a2w::core::dm::Constants::AT_XPOS } >= $hrefOptionsPar->{ 'xRange' }{ 'from' } && $pomObjTmp->{ $a2w::core::dm::Constants::AT_XPOS } <= $hrefOptionsPar->{ 'xRange' }{ 'to' } )
                 && ( $pomObjTmp->{ $a2w::core::dm::Constants::AT_OBJINFO }{ $a2w::core::dm::Constants::OI_TEXT_VALUE } =~ $hrefOptionsPar->{ 'reEC' } )
               ){
                $pomObjTmp->{ $a2w::core::dm::Constants::AT_SEARCH_INDEX } = $i;  # store index of object in search iterator
                $pomObjTmp->{ $a2w::core::dm::Constants::AT_SEARCH_RESULT } = $1; # store regexp group 1 result
                push( @arrECTmp, $arrContentTmp[ $i ] );
            }
        }
    }
    else { # Eyecatcher is non text
        for ( my $i = $hrefOptionsPar->{ 'start' }; $i <= $hrefOptionsPar->{ 'end' }; $i++ ){
            $pomObjTmp = $arrContentTmp[ $i ]->{ 'POMOBJ' };
            if (    ( $pomObjTmp->{ $a2w::core::dm::Constants::AT_OBJTYPE } == $hrefOptionsPar->{ 'objectType' } )
                 && ( $pomObjTmp->{ $a2w::core::dm::Constants::AT_ADJ_YPOS } >= $hrefOptionsPar->{ 'yRange' }{ 'from' } && $pomObjTmp->{ $a2w::core::dm::Constants::AT_ADJ_YPOS } <= $hrefOptionsPar->{ 'yRange' }{ 'to' } )
                 && ( $pomObjTmp->{ $a2w::core::dm::Constants::AT_XPOS } >= $hrefOptionsPar->{ 'xRange' }{ 'from' } && $pomObjTmp->{ $a2w::core::dm::Constants::AT_XPOS } <= $hrefOptionsPar->{ 'xRange' }{ 'to' } )
               ){
                $pomObjTmp->{ $a2w::core::dm::Constants::AT_SEARCH_INDEX } = $i; # store index of object in search iterator
                push( @arrECTmp, $arrContentTmp[ $i ] );
            }
        }
    }

    #if ( $bLogTmp == $TRUE ){ $theLogger->logMessage( "Eyecatcher(s):" ); }
    #if ( $bLogTmp == $TRUE ){ $theLogger->logHashMessage( { 'EC' => \@arrECTmp } ); }
    
    #---- Evaluate search results
    if ( @arrECTmp <= 0 ){ return undef; }
    if ( @arrECTmp == 1 ){ return $arrECTmp[ 0 ]; }
    return \@arrECTmp;
}

#-----------------------------------------------------------------------
# findEyecatcherValue
#
# Find eyecatcher value on given content
#
# Parameters:
# content     Array of objects (sorted top to bottom and left to right)
# options     options is an object, it should be defined as follow
#             "options":{
#               'xRange':        {'from':0,'to':100000}, // where xRange, from and to are optional
#               'yRange':        {'from':0,'to':100000}, // where yRange, from and to are optional
#               'reEC':          <regular expression>,   // eyecatcher regexpr
#               'reECValue':     <regular expression>,   // eyecatcher value regexpr
#               'start':         OPTIONAL: specify from which array index the search should start from, default is 0
#               'end':           OPTIONAL: specify up to which array index the search should go, default is maximum of content array
#               'objectType':    Type of eyecatcher object (text, line, vector, image, container). Default is text
#               'direction':     Search direction for the eyecatcher value (right, left, top and bottom). Default is right
#               'xTolerance':    OPTIONAL: Specify adjustment in X range to search value. Default is 1
#               'yTolerance':    OPTIONAL: Specify adjustment in Y range to search value. Default is 1
#             }
#                  
# Prototypes:
#  ecIndexValue = findEyecatcherValue(content, options)   where options is as defined above
# 
# Example:
#  ecIndexValue = findEyecatcherValue(content,{'reEC' => qr/^Datum.*$/i, 'reECValue' => qr/^(\d{2}.\d{2}.\d{4})$/i})
# 
#-----------------------------------------------------------------------
sub findEyecatcherValue{
    #---- Get logger
    $theLogger  = a2w::core::log::Logger->getSingleton();
    my $bLogTmp = $theLogger->isRegistered( __PACKAGE__ );

    if ( $bLogTmp == $TRUE ){ $theLogger->logFunctionName( __PACKAGE__, "findEyecatcherValue" ); }

    #---- Get parameters
    #
    my $arefContentPar = shift;
    my $hrefOptionsPar = shift;

    #---- Assert parameters
    if ( $arefContentPar == undef || $hrefOptionsPar == undef ){ return undef; }
    my @arrContentTmp = @{ $arefContentPar };
  
    #---- Assert reECValue
    unless ( defined( $hrefOptionsPar->{ 'reECValue' } ) ){ return undef; }
    unless ( lc( ref( \$hrefOptionsPar->{ 'reECValue' } ) ) ne "regexp" ){ return undef; } # assert reECValue is regexp pattern

	#---- First find the eyecatcher
	my $refECTmp = a2w::core::dm::MiningUtils::findEyecatcher( $arefContentPar, $hrefOptionsPar );
    if ( $refECTmp == undef ){ return undef; }

    #---- Assert direction
    my $hshDirMapTmp = (
          'right'  => $a2w::core::dm::Constants::EYECATCHER_SEARCH_DIR_RIGHT
        , 'left'   => $a2w::core::dm::Constants::EYECATCHER_SEARCH_DIR_LEFT
        , 'top'    => $a2w::core::dm::Constants::EYECATCHER_SEARCH_DIR_TOP
        , 'bottom' => $a2w::core::dm::Constants::EYECATCHER_SEARCH_DIR_BOTTOM
    );
    if ( defined( $hshDirMapTmp{ lc( $hrefOptionsPar->{ 'direction' } ) } ) ){ $hrefOptionsPar->{ 'direction' } = $hshDirMapTmp{ lc( $hrefOptionsPar->{ 'direction' } ) }; }
    unless ( defined( $hrefOptionsPar->{ 'direction' } ) ){ $hrefOptionsPar->{ 'direction' } = $a2w::core::dm::Constants::EYECATCHER_SEARCH_DIR_RIGHT; }

    #---- Assert tolerance options
    unless ( defined( $hrefOptionsPar->{ 'xTolerance' } ) ){ $hrefOptionsPar->{ 'xTolerance' } = 1; }
    unless ( defined( $hrefOptionsPar->{ 'yTolerance' } ) ){ $hrefOptionsPar->{ 'yTolerance' } = 1; }

    #---- Build search options for eyecatcher value
    my $hrefOptionsTmp = {};
    $hrefOptionsTmp->{ 'reEC'  } = $hrefOptionsPar->{ 'reECValue' };
    if ( !defined( $hrefOptionsPar->{ 'xRange' } ) ){ $hrefOptionsTmp->{ 'xRange' } = { 'from' => 0, 'to' => 100000 }; }
    if ( !defined( $hrefOptionsPar->{ 'yRange' } ) ){ $hrefOptionsTmp->{ 'yRange' } = { 'from' => 0, 'to' => $arrContentTmp[ $#arrContentTmp ]->{ $a2w::core::dm::Constants::AT_ADJ_YPOS } }; }
    if ( defined( $hrefOptionsPar->{ 'objectType' } ) ){ $hrefOptionsTmp->{ 'objectType' } = $hrefOptionsPar->{ 'objectType' }; }

    my $hrefECTmp = $refECTmp; # By default, assume eyecatcher is made of single object
    if ( lc( ref( $refECTmp ) ) eq "array" ){ # Eyecatcher is made of multiple objects
        if ( $hrefOptionsPar->{ 'direction' } == $a2w::core::dm::Constants::EYECATCHER_SEARCH_DIR_RIGHT ){
            #---- Start searching value after last object of eyecatcher
            my @arrECTmp = @{ $refECTmp };
            $hrefECTmp = $arrECTmp[ $#arrECTmp ]->{ 'POMOBJ' };
            $hrefOptionsTmp->{ 'start' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_SEARCH_INDEX } + 1;
            $hrefOptionsTmp->{ 'xRange' }{ 'from' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_XPOS } + $hrefOptionsPar->{ 'xTolerance' };
            $hrefOptionsTmp->{ 'yRange' }{ 'from' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_ADJ_YPOS };
            $hrefOptionsTmp->{ 'yRange' }{ 'to' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_ADJ_YPOS } + $hrefOptionsPar->{ 'yTolerance' };
        }
        elsif ( $hrefOptionsPar->{ 'direction' } == $a2w::core::dm::Constants::EYECATCHER_SEARCH_DIR_LEFT ){
            #---- Start searching value before object of eyecatcher
            my @arrECTmp = @{ $refECTmp };
            $hrefECTmp = $arrECTmp[ 0 ]->{ 'POMOBJ' };
            $hrefOptionsTmp->{ 'end' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_SEARCH_INDEX } - 1;
            $hrefOptionsTmp->{ 'xRange' }{ 'to' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_XPOS } - $hrefOptionsPar->{ 'xTolerance' };
            $hrefOptionsTmp->{ 'yRange' }{ 'from' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_ADJ_YPOS };
            $hrefOptionsTmp->{ 'yRange' }{ 'to' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_ADJ_YPOS } + $hrefOptionsPar->{ 'yTolerance' };
        }
        elsif ( $hrefOptionsPar->{ 'direction' } == $a2w::core::dm::Constants::EYECATCHER_SEARCH_DIR_TOP ){
            #---- Start searching value above eyecatcher
            my @arrECTmp = @{ $refECTmp };
            $hrefECTmp = $arrECTmp[ 0 ]->{ 'POMOBJ' };
            $hrefOptionsTmp->{ 'end' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_SEARCH_INDEX } - 1;
            $hrefOptionsTmp->{ 'xRange' }{ 'from' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_XPOS } - $hrefOptionsPar->{ 'xTolerance' };
            $hrefOptionsTmp->{ 'yRange' }{ 'from' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_ADJ_YPOS } - $hrefOptionsPar->{ 'yTolerance' };
            $hrefOptionsTmp->{ 'yRange' }{ 'to' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_ADJ_YPOS } - 1;

            my $hrefLastECTmp = $arrECTmp[ $#arrECTmp ]->{ 'POMOBJ' };
            $hrefOptionsTmp->{ 'xRange' }{ 'to' } = $hrefLastECTmp->{ $a2w::core::dm::Constants::AT_XPOS } + $hrefOptionsPar->{ 'xTolerance' };
        }
        elsif ( $hrefOptionsPar->{ 'direction' } == $a2w::core::dm::Constants::EYECATCHER_SEARCH_DIR_BOTTOM ){
            #---- Start searching value below eyecatcher
            my @arrECTmp = @{ $refECTmp };
            $hrefECTmp = $arrECTmp[ 0 ]->{ 'POMOBJ' };
            $hrefOptionsTmp->{ 'start' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_SEARCH_INDEX } + 1;
            $hrefOptionsTmp->{ 'xRange' }{ 'from' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_XPOS } - $hrefOptionsPar->{ 'xTolerance' };
            $hrefOptionsTmp->{ 'yRange' }{ 'from' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_ADJ_YPOS } + 1;
            $hrefOptionsTmp->{ 'yRange' }{ 'to' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_ADJ_YPOS } + $hrefOptionsPar->{ 'yTolerance' };

            my $hrefLastECTmp = $arrECTmp[ $#arrECTmp ]->{ 'POMOBJ' };
            $hrefOptionsTmp->{ 'xRange' }{ 'to' } = $hrefLastECTmp->{ $a2w::core::dm::Constants::AT_XPOS } + $hrefOptionsPar->{ 'xTolerance' };
        }
    }
    else {
        $hrefECTmp = $hrefECTmp->{ 'POMOBJ' };
        if ( $hrefOptionsPar->{ 'direction' } == $a2w::core::dm::Constants::EYECATCHER_SEARCH_DIR_RIGHT ){
            #---- Start searching value after last object of eyecatcher
            $hrefOptionsTmp->{ 'start' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_SEARCH_INDEX } + 1;
            $hrefOptionsTmp->{ 'xRange' }{ 'from' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_XPOS } + $hrefOptionsPar->{ 'xTolerance' };
            $hrefOptionsTmp->{ 'yRange' }{ 'from' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_ADJ_YPOS };
            $hrefOptionsTmp->{ 'yRange' }{ 'to' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_ADJ_YPOS };
        }
        elsif ( $hrefOptionsPar->{ 'direction' } == $a2w::core::dm::Constants::EYECATCHER_SEARCH_DIR_LEFT ){
            #---- Start searching value before object of eyecatcher
            $hrefOptionsTmp->{ 'end' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_SEARCH_INDEX } - 1;
            $hrefOptionsTmp->{ 'xRange' }{ 'to' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_XPOS } - $hrefOptionsPar->{ 'xTolerance' };
            $hrefOptionsTmp->{ 'yRange' }{ 'from' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_ADJ_YPOS };
            $hrefOptionsTmp->{ 'yRange' }{ 'to' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_ADJ_YPOS } + $hrefOptionsPar->{ 'yTolerance' };
        }
        elsif ( $hrefOptionsPar->{ 'direction' } == $a2w::core::dm::Constants::EYECATCHER_SEARCH_DIR_TOP ){
            #---- Start searching value above eyecatcher
            $hrefOptionsTmp->{ 'end' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_SEARCH_INDEX } - 1;
            $hrefOptionsTmp->{ 'xRange' }{ 'from' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_XPOS } - $hrefOptionsPar->{ 'xTolerance' };
            $hrefOptionsTmp->{ 'yRange' }{ 'from' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_ADJ_YPOS } - $hrefOptionsPar->{ 'yTolerance' };
            $hrefOptionsTmp->{ 'yRange' }{ 'to' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_ADJ_YPOS } - 1;
        }
        elsif ( $hrefOptionsPar->{ 'direction' } == $a2w::core::dm::Constants::EYECATCHER_SEARCH_DIR_BOTTOM ){
            #---- Start searching value below eyecatcher
            $hrefOptionsTmp->{ 'start' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_SEARCH_INDEX } + 1;
            $hrefOptionsTmp->{ 'xRange' }{ 'from' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_XPOS } - $hrefOptionsPar->{ 'xTolerance' };
            $hrefOptionsTmp->{ 'yRange' }{ 'from' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_ADJ_YPOS } + 1;
            $hrefOptionsTmp->{ 'yRange' }{ 'to' } = $hrefECTmp->{ $a2w::core::dm::Constants::AT_ADJ_YPOS } + $hrefOptionsPar->{ 'yTolerance' };
        }
    }

	#---- Find the eyecatcher value
	my $refECValueTmp = a2w::core::dm::MiningUtils::findEyecatcher( $arefContentPar, $hrefOptionsTmp );

    return $refECValueTmp;
}

#-----------------------------------------------------------------------
# Don't remove the following lines !!!
#-----------------------------------------------------------------------
1;
__END__
