#-------------------------------------------------------------------------------
#  sortPageTextObjs.pm: Scripting Facility to sort Page Text Objects
#
#
#  Call:
#
#  On Windows:	afp2web.exe -q -c -doc_cold -sp:sortPageTextObjs.pm samples\insure.afp
#
#  On Unix:	./afp2web   -q -c -doc_cold -sp:sortPageTextObjs.pm samples/insure.afp
#
#  Author  : Fa. Maas
#  Date    : 2008-07-10
#  Version : 1.0.1
#
#  $V100   2005-08-01	Initial Release
#
#  $V101   2008-07-10	Extended to add script file path as module search path
#                       on "BEGIN" block
#-------------------------------------------------------------------------------

# $V101 Begin
#-----------------------------------------------------------------------
# BEGIN block of module
#
# Extends PERL module search path array (@INC) with new element having
# this script modules path in order to have better module portability
#-----------------------------------------------------------------------
BEGIN {
    #---- Fetch script filename
    my $sScriptFilenameTmp = $0;

    #---- Extract script file path from script filename
    my $sScriptFilePathTmp = "";
    if ( $sScriptFilenameTmp =~ /(.*)\/.*\.pm/ ){
        $sScriptFilePathTmp = $1;
    }

    #printf ( "Script filename: " . $0 . " Script filepath: " . $sScriptFilePathTmp . "\n" );

    if ( $sScriptFilePathTmp eq "" ){
        $sScriptFilePathTmp = ".";
    }

    #---- Add script file path to module search path
    unshift( @INC, $sScriptFilePathTmp );
}
# $V101 End

use a2w::Config;
use a2w::Document;
use a2w::Font;
use a2w::Index;
use a2w::Kernel;
use a2w::Line;
use a2w::MediumMap;
use a2w::NOP;
use a2w::Overlay;
use a2w::Page;
use a2w::PSEG;
use a2w::Text;
#-----------------------------------------------------------------------
# Initialize once per process
#-----------------------------------------------------------------------
sub initialize(){

    #---- Get Parameter of initialize( Par: a2w::Config, a2w::Kernel )
    ( $a2wConfigPar, $a2wKernelPar ) = @_;

    #---- Define boolean values
    $TRUE  = 1;	# TRUE  boolean value
    $FALSE = 0;	# FALSE boolean value

    #---- Set/Reset Logging
	$bLog = $FALSE;
	if (index( lc($a2wConfigPar->getAttribute("LoggingLevel")), "sf") >= 0 ){
		$bLog = $TRUE;
	}

    my $svScriptProcTmp = $a2wConfigPar->getAttribute("ScriptProcedure");
    my $svScriptArgsTmp	= $a2wConfigPar->getScriptArgs();
    $svIndexFilePath	= $a2wConfigPar->getIndexFilePath();
    $svOutputFilePath	= $a2wConfigPar->getOutputFilePath();
    $svSpoolFilename	= $a2wKernelPar->getSpoolFilename();

    if ( $bLog == $TRUE ){
	    print "Running $svScriptProcTmp...\n";
        print "initialize(): Processing $svSpoolFilename\n";
		print "initialize(): Args: $svScriptArgsTmp, IndexFilePath: $svIndexFilePath, OutputFilePath: $svOutputFilePath\n";
    }

	#---- Open Dump file
	my ($svSpoolFilenamePathTmp, $svDumpFilenameTmp) = ($svSpoolFilename =~ /^((?:.*[:\\\/])?)(.*)/s);
	$svDumpFilenameTmp = $svOutputFilePath . $svDumpFilenameTmp . ".txt";
    open( fDumpFile, ">$svDumpFilenameTmp" );
    print "Running $svScriptProcTmp: Dumping to $svDumpFilenameTmp...\n";
	return 0;
}

#-----------------------------------------------------------------------
# InitializeDoc for each document
#-----------------------------------------------------------------------
sub initializeDoc(){

    #---- Get Parameter of initializeDoc( Par: a2w::Document )
    ($a2wDocumentPar) = @_;

	if ( $bLog == $TRUE ){
		print "initializeDoc(): DocId " . $a2wDocumentPar->getId() . "\n";
	}
    return 0;
}

#-----------------------------------------------------------------------
# InitializePage for each page
#-----------------------------------------------------------------------
sub initializePage(){

    #---- Get Parameter of initializePage( Par: a2w::Page )
    ($a2wPagePar) = @_;

    if ( $bLog == $TRUE ){
        print "initializePage()\n";
    }
    return 0;
}

#-----------------------------------------------------------------------
# Main entry method
# Return values:
#  	  < 0:	error
# 		0:	append page to Current Document
# 		1:	skip page
# 		2:	first page / new document
#-----------------------------------------------------------------------
sub afp2web(){

	if ( $bLog == $TRUE ){
        print "afp2web(): PageId " . $a2wPagePar->getParseId() . "\n";
	}

	$APPEND	= 0; # append page to Current Document
	$SKIP	= 1; # skip page
	$NEWDOC	= 2; # new document

	#---- Set default return value
	my $svRetTmp = $APPEND; # default: append page

	#---- Get Page Id
	my $svPageIdTmp = $a2wPagePar->getParseId();

	#---- Dump
	printf fDumpFile ("================================= ");
	printf fDumpFile ("Page %06d", $svPageIdTmp);
	printf fDumpFile (" =================================\n");

    #---- Add the Page Text Objects to an unsorted Object List
    @unsortedObjList = ();
    my $PtrTmp = @unsortedObjList;

    #---- Define temp variables
	my $svTextTmp     = "";
	my $svTextXPosTmp = 0;
	my $svTextYPosTmp = 0;

	#---- Fetch first Text Object
	my $a2wTextTmp = $a2wPagePar->getFirstText();

	#---- Loop thru all the Text Objects
	while ( $a2wTextTmp != 0 ){

        $svTextTmp     = $a2wTextTmp->getText();
        $svTextXPosTmp = $a2wTextTmp->getXPos();
        $svTextYPosTmp = $a2wTextTmp->getYPos();
		if ( $bLog == $TRUE ){
			print "    @(" . $svTextXPosTmp . "," . $svTextYPosTmp . ")>" . $svTextTmp . "<\n";
		}

		#---- Add Object to unsorted Object List
		@unsortedObjList[$PtrTmp++] = ({XPOS => $svTextXPosTmp,
										YPOS => $svTextYPosTmp,
										TEXT => $svTextTmp
									   });

		#---- Get the next Text Object
		$a2wTextTmp = $a2wPagePar->getNextText();

	} # end-while

	#---- Buil/Dump Lines
	buildLines();

    return $svRetTmp;
}

#-----------------------------------------------------------------------
# FinalizePage for each page
#-----------------------------------------------------------------------
sub finalizePage(){

    if ( $bLog == $TRUE ){
        print "finalizePage()\n";
    }
    return 0;
}

#-----------------------------------------------------------------------
# FinalizeDoc for each document
#-----------------------------------------------------------------------
sub finalizeDoc(){

    if ( $bLog == $TRUE ){
        print "finalizeDoc()\n";
    }
    return 0;
}

#-----------------------------------------------------------------------
# Finalize once per process
#-----------------------------------------------------------------------
sub finalize(){

    if ( $bLog == $TRUE ){
        print "finalize()\n";
    }
    close(fDumpFile);
    return 0;
}

#-----------------------------------------------------------------------
# Sorting Algorithm
#
#	first Key is YPOS, second Key is XPOS
#-----------------------------------------------------------------------
sub complex_arrays{
	$a->{YPOS} <=> $b->{YPOS} ||
	$a->{XPOS} <=> $b->{XPOS};
}

#-----------------------------------------------------------------------
# Build Lines
#-----------------------------------------------------------------------
sub buildLines(){

    #---- Define temp variables
	my $LineTmp = "";
	my $LineCountTmp = 0;
	my $currYPosTmp = 0;
	my @sortedObjList = ();

	#---- Sort Array: first Key is YPOS, second Key is XPOS
	@sortedObjList = sort complex_arrays @unsortedObjList;

	#---- Build Lines based on sorted Array
	foreach (@sortedObjList){

		$YPosTmp = $_->{YPOS};
		$TextTmp = $_->{TEXT};

		if ( $currYPosTmp != $YPosTmp && $currYPosTmp != 0 ){

			#---- Add line 
			printf fDumpFile ("%06d:", $LineCountTmp);
			print fDumpFile ("$LineTmp\n");

			$LineCountTmp++;
			$LineTmp = $TextTmp;

		}
		else{ # append Text to current line
			$LineTmp = $LineTmp . $TextTmp;
		}
		$currYPosTmp = $YPosTmp;
	}
}

__END__
