#-------------------------------------------------------------------------------
#  afpidx2xml-generator.pm:
#
#  - Scripting Facility to create XML output from AFP indexes
#  - Module reads in XML template based on the index "LISTENTYP(JOBNAME)"
#  - Fills in the XML template with actual values from the AFP indexes
#  - Stores the updated XML in file
#
#  Call:
#
#  On Windows:
#            afp2web.exe -q -c -doc_cold -sp:afpidx2xml-generator.pm samples\insure.afp
#
#  On Unix:
#            ./afp2web   -q -c -doc_cold -sp:afpidx2xml-generator.pm samples/insure.afp
# 
#  Author  : Fa. Maas
#
#  V100   2025-01-27    AFP-1210: HSBC: Initial Release
#
#-------------------------------------------------------------------------------

#-----------------------------------------------------------------------
# BEGIN block of module
#
# Extends PERL module search path array (@INC) with new element having
# this script modules path in order to have better module portability
#-----------------------------------------------------------------------
BEGIN {
    #---- Fetch script filename
    my $sScriptFilenameTmp = $0;

    #---- Extract script file path from script filename
    my $sScriptFilePathTmp = "";
    if ( $sScriptFilenameTmp =~ /(.*)\/.*\.pm/ ){
        $sScriptFilePathTmp = $1;
    }

    #printf STDERR ( "Script filename: " . $0 . " Script filepath: " . $sScriptFilePathTmp . "\n" );

    if ( $sScriptFilePathTmp eq "" ){
        $sScriptFilePathTmp = ".";
    }
    else {
        my $sScriptFileParentPathTmp = "";
        if ( $sScriptFilePathTmp =~ /(.*)\/sfsamples/ ){
            $sScriptFileParentPathTmp = $1;
        }

        #---- Add script file parent path to module search path
        if ( $sScriptFileParentPathTmp ne "" ){
            unshift( @INC, $sScriptFileParentPathTmp );
        }
    }

    #---- Add script file path to module search path
    unshift( @INC, $sScriptFilePathTmp );
    unshift( @INC, $sScriptFilePathTmp . '/perl' );
    unshift( @INC, $sScriptFilePathTmp . '/perl/lib' );
}

use a2w::Config;
use a2w::Document;
use a2w::Font;
use a2w::Index;
use a2w::Kernel;
use a2w::Line;
use a2w::MediumMap;
use a2w::NOP;
use a2w::Overlay;
use a2w::Page;
use a2w::PSEG;
use a2w::Text;

use a2w::ConfigConstants;
use a2w::DocumentConstants;
use a2w::PageConstants;
use a2w::FontConstants;

use customers::hsbc::jobinfo;

#-----------------------------------------------------------------------
# Initialize once per process
#-----------------------------------------------------------------------
sub initialize(){

    #---- Get Parameter of initialize( Par: a2w::Config, a2w::Kernel )
    ( $a2wConfigPar, $a2wKernelPar ) = @_;

    #---- Define boolean values
    $TRUE  = 1;    # TRUE  boolean value
    $FALSE = 0;    # FALSE boolean value

    #---- Set/Reset Logging
    $bLog = $FALSE;
    if (index( lc($a2wConfigPar->getAttribute( $a2w::ConfigConstants::LOGGINGLEVEL )), "sf") >= 0 ){
        $bLog = $TRUE;
    }

    my $sScriptProcTmp = $a2wConfigPar->getAttribute( $a2w::ConfigConstants::SCRIPTPROCEDURE );
    my $sScriptArgsTmp = $a2wConfigPar->getAttribute( $a2w::ConfigConstants::SCRIPTARGUMENT );
    $sConfigFilePath   = $a2wConfigPar->getAttribute( $a2w::ConfigConstants::INIPATH );
    $sIndexFilePath    = $a2wConfigPar->getAttribute( $a2w::ConfigConstants::INDEXPATH );
    $sOutputFilePath   = $a2wConfigPar->getAttribute( $a2w::ConfigConstants::OUTPUTFILEPATH );
    $sSpoolType        = $a2wConfigPar->getAttribute( $a2w::ConfigConstants::INPUTFORMAT );
    $sOutputType       = $a2wConfigPar->getAttribute( $a2w::ConfigConstants::OUTPUTFORMAT );
    $sSpoolFilename    = $a2wKernelPar->getSpoolFilename();
    $sAFP2webVersion   = $a2wKernelPar->getVersion();

    $sScriptFilename   = $0;
    $sScriptBasename   = $sScriptFilename;
    $sScriptBasename   =~ s/.*\/(.*)$/$1/;

    $sTemplatePath     = $sConfigFilePath . "XMLTemplate/";
    $sTemplateName     = "indexes";
    $sTemplateExt      = ".xml";

    if ( $bLog == $TRUE ){
        printf STDERR ( "Running $sScriptProcTmp...\n" );
        printf STDERR ( "initialize(): Processing $sSpoolFilename\n" );
        printf STDERR ( "initialize(): Args: $sScriptArgsTmp, INIFilePath: $sConfigFilePath, IndexFilePath: $sIndexFilePath, OutputFilePath: $sOutputFilePath\n" );
        printf STDERR ( "initialize(): Template path: $sTemplatePath\n" );
    }

    #---- Get job to document indexes mapping table
    $hrefJob2DocMapTable = \%customers::hsbc::jobinfo::job2docinfo;

    #---- Page process flags
    $APPEND = 0;    # append page to Current Document
    $SKIP   = 1;    # skip page
    $NEWDOC = 2;    # new document

    #---- Reset Page Id
    $iPageId = 0;

    #---- Set AutoSplit on
    $a2wConfigPar->setAttribute( $a2w::ConfigConstants::AUTOSPLIT, "on" );

    #---- afpidx2xml-generator.pm Version
    $sVersion = "v1.0.0";

    #---- Load the template
    my ($iLRcTmp, $sMsgTmp) = _loadTemplate( $sTemplateName );
    if ( $iLRcTmp < 0 ){ return ($iLRcTmp, $sMsgTmp); }
    $arefTplContent = $sMsgTmp;

    #---- Split template content
    $hrefTplSplitCont = _splitTemplateContent( $arefTplContent );
    if ( $hrefTplSplitCont == undef ){
        return ( -103, "Unable to split the template content" );
    }

    return 0;
}

#-----------------------------------------------------------------------
# InitializeDoc for each document
#-----------------------------------------------------------------------
sub initializeDoc(){

    #---- Get Parameter of initializeDoc( Par: a2w::Document )
    ($a2wDocumentPar) = @_;

    #---- Fetch DocId
    my $DocIdTmp = $a2wDocumentPar->getId();

    if ( $bLog == $TRUE ){ printf STDERR ( "initializeDoc(): DocId $DocIdTmp\n" ); }

    #---- Reset Page Id
    $iPageId = 0;

    return 0;
}

#-----------------------------------------------------------------------
# InitializePage for each page
#-----------------------------------------------------------------------
sub initializePage(){

    #---- Get Parameter of initializePage( Par: a2w::Page )
    ($a2wPagePar) = @_;

    if ( $bLog == $TRUE ){ printf STDERR ( "initializePage()\n" ); }

    return 0;
}

#-----------------------------------------------------------------------
# Main entry method
# Return values:
#        < 0:    error
#         0:    append page to Current Document
#         1:    skip page
#         2:    first page / new document
#-----------------------------------------------------------------------
sub afp2web(){

    if ( $bLog == $TRUE ){ printf STDERR ( "afp2web(): PageId " . $a2wPagePar->getParseId() . "\n" ); }

    #---- Set default return value
    my $iRetTmp = $APPEND; # default: append page

    #---- Increment PageId
    $iPageId++;

    return $iRetTmp;
}

#-----------------------------------------------------------------------
# FinalizePage for each page
#-----------------------------------------------------------------------
sub finalizePage(){

    if ( $bLog == $TRUE ){ printf STDERR ( "finalizePage()\n" ); }

    return 0;
}

#-----------------------------------------------------------------------
# FinalizeDoc for each document
#-----------------------------------------------------------------------
sub finalizeDoc(){

    if ( $bLog == $TRUE ){ printf STDERR ( "finalizeDoc()\n" ); }

    #---- Get document indexes
    my $hrefDocObjectTmp = _getDocumentIndexes( $a2wDocumentPar );
    if ( $hrefDocObjectTmp == undef ){
        printf STDERR ( "No indexes found in document: " . $a2wDocumentPar->getId() . ", skipped generating XML\n" );
        return 0;
    }
    if ( $hrefDocObjectTmp->{ "pages" } == undef ){
        printf STDERR ( "No page found in document: " . $a2wDocumentPar->getId() . ", skipped generating XML\n" );
        return 0;
    }
    my $hrefPg1Tmp = $hrefDocObjectTmp->{ "pages" }[ 0 ];
    if ( $hrefPg1Tmp->{ "indexes" } == undef ){
        printf STDERR ( "No indexes found in first page of document: " . $a2wDocumentPar->getId() . ", skipped generating XML\n" );
        return 0;
    }

    #---- Update document with output file info
    my $sOutFilenameTmp = $sOutputFilePath . $a2wDocumentPar->getOutputFilename();
    my $sXMLFilenameTmp = $sOutFilenameTmp;
    $sXMLFilenameTmp =~ s/\.(.{3,4})$/.xml/;

    $hrefDocObjectTmp->{ "type" } = lc( $sOutputType );
    $hrefDocObjectTmp->{ "file" } = $sOutFilenameTmp;
    $hrefDocObjectTmp->{ "filesize" } = $a2wDocumentPar->getSize();
    $hrefDocObjectTmp->{ "pagecount" } = $a2wDocumentPar->getPageCount();

    #---- Get the XML template name
    # NOTE:
    # LISTENTYP(JOBNAME) index on first page provides the XML template name
    #
    my $sJobNameTmp = $hrefPg1Tmp->{ "indexes" }{ "LISTENTYP(JOBNAME)" };
    if ( $sJobNameTmp eq "" ){
        printf STDERR ( "Job name index: LISTENTYP(JOBNAME) not found in first page of document: " . $a2wDocumentPar->getId() . ", skipped generating XML\n" );
        return 0;
    }
    if ( $sJobNameTmp =~ /^LISTENTYP\(JOBNAME\)=(.*)$/ ){ $sJobNameTmp = $1; }
    if ( $sJobNameTmp eq "" ){
        printf STDERR ( "Job name could not be extracted from the index: LISTENTYP(JOBNAME) value: " . $hrefPg1Tmp->{ "indexes" }{ "LISTENTYP(JOBNAME)" } . ", skipped generating XML\n" );
        return 0;
    }
    $hrefDocObjectTmp->{ "jobname" } = $sJobNameTmp;
    if ( $bLog == $TRUE ){ printf STDERR ( "Job name:>" . $sJobNameTmp . "<\n" ); }

    #---- Fill in template
    my $hrefFilledContTmp = _fillTemplateContent( $hrefTplSplitCont, $hrefDocObjectTmp );
    if ( $hrefFilledContTmp == undef ){
        return ( -104, "Unable to fill the template for the job:>" . $sJobNameTmp . "<" );
    }

    #---- Generate XML with filled content
    my ($iGRcTmp, $sResultTmp) = _generateXML( $sXMLFilenameTmp, $hrefFilledContTmp );
    if ( $iGRcTmp < 0 ){ return ($iGRcTmp, $sResultTmp); }

    return 0;
}

#-----------------------------------------------------------------------
# Finalize once per process
#-----------------------------------------------------------------------
sub finalize(){

    if ( $bLog == $TRUE ){ printf STDERR ( "finalize()\n" ); }

    return 0;
}

#-----------------------------------------------------------------------
# Get document indexes (both document and page level indexes)
#
# Returns
# {
#     "indexes": {
#         <document index name>: <document index value>
#     }
#     ,
#     "pages": [
#         {
#             "id": <Page number>,
#             "indexes": {
#                 <page index name>: <page index value>
#             }
#         }
#         , {
#             "id": <Page number>,
#             "indexes": {
#                 <page index name>: <page index value>
#             }
#         }
#     ]
# }
#
#-----------------------------------------------------------------------
sub _getDocumentIndexes(){

    #---- Get parameter
    #
    # 1. Document
    #
    my $a2wDocPar = shift;

    if ( $bLog == $TRUE ){ printf STDERR ( "_getDocumentIndexes()\n" ); }

    #---- Assert parameter
    if ( $a2wDocPar == undef ) { return undef; }

    #---- Document indexes object
    my $hrefResultTmp = {};
    my $iDocIdxCountTmp = 0;
    my $iAllPgIdxCountTmp = 0;
    my $hrefDocIdxesTmp = {};

    #---- Fetch document indexes ----#
    my $a2wIndexTmp = $a2wDocPar->getFirstIndex();
    while ( $a2wIndexTmp != 0 ){
        my $sNameTmp = $a2wIndexTmp->getName();
        if ( $sNameTmp ne "" ){
            $hrefDocIdxesTmp->{ $sNameTmp } = $a2wIndexTmp->getValue();
            if ( $bLog == $TRUE ){ printf STDERR ( "DocIndex: " . $sNameTmp . "=>" . $hrefDocIdxesTmp->{ $sNameTmp } . "<\n" ); }
            $iDocIdxCountTmp++;
        }

        #---- Get next document index
        $a2wIndexTmp = $a2wDocPar->getNextIndex();
    }
    if ( $iDocIdxCountTmp > 0 ){ $hrefResultTmp->{"indexes"} = $hrefDocIdxesTmp; }

    #---- Fetch page indexes ----#
    my $iPgIdxTmp = -1;
    my $a2wPageTmp = $a2wDocPar->getFirstPage();
    while ( $a2wPageTmp != 0 ){
        #---- Get first index of page
        my $iPgIdxCountTmp = 0;
        my $hrefPgIdxesTmp = {};

        $iPgIdxTmp++;

        my $iIdxCntTmp = 0;
        $a2wIndexTmp = $a2wPageTmp->getFirstIndex();
        while ( $a2wIndexTmp != 0 ){
            $iIdxCntTmp++;
            my $sNameTmp = $a2wIndexTmp->getName();
            if ( $sNameTmp ne "" ){
                $hrefPgIdxesTmp->{ $sNameTmp } = $a2wIndexTmp->getValue();
                if ( $bLog == $TRUE ){
                    printf STDERR (   "Page " . substr("000" . ($iPgIdxTmp + 1), -3 )
                                    . " Index " . substr( "000" . $iIdxCntTmp, -3 )
                                    . ": " . $sNameTmp . "=>" . $hrefPgIdxesTmp->{ $sNameTmp }
                                    . "<\n"
                                  );
                }
                $iPgIdxCountTmp++;
            }

            #---- Get next page index
            $a2wIndexTmp = $a2wPageTmp->getNextIndex();
        }
        if ( $iPgIdxCountTmp > 0 ){
            if ( $hrefResultTmp->{"pages"} == undef ){ $hrefResultTmp->{"pages"} = []; }
            $hrefResultTmp->{"pages"}->[$iPgIdxTmp] = { "id" => ( $iPgIdxTmp + 1 ), "indexes" => $hrefPgIdxesTmp };
            $iAllPgIdxCountTmp += $iPgIdxCountTmp;
        }

        #---- Get next page
        $a2wPageTmp = $a2wDocPar->getNextPage();
    }

    return ( $iDocIdxCountTmp > 0 || $iAllPgIdxCountTmp > 0 ) ? $hrefResultTmp : undef;
}

#-----------------------------------------------------------------------
# Load template
#
# Check and load the XML template based on given name
# - Searches the template file in template path (<INI file path>/templates/<name>.xml.tpl)
# - If found, loads the content as array of string (each line is one string)
#
# Returns
# Array with two elements
# - First element gives the return code (>= 0 means success else error)
# - Second element gives either the error message or array reference have template content
#
#-----------------------------------------------------------------------
sub _loadTemplate(){

    #---- Get parameter
    #
    # 1. Template name (of type string)
    #
    my $sTplNamePar = shift;

    if ( $bLog == $TRUE ){ printf STDERR ( "_loadTemplate(" . $sTplNamePar . ")\n" ); }

    #---- Evaluate template filename
    my $sTplFilenameTmp = $sTemplatePath . $sTplNamePar . $sTemplateExt;

    #---- Read template file content
    my $iRcTmp = open( TPL, "<$sTplFilenameTmp" );
    if ( $iRcTmp == 0 ){
        return (-101, "Unable to open template file $sTplFilenameTmp. rc=$iRcTmp reason=" . $!);
    }
    my @arrTPLTmp = <TPL>;
    close( TPL );

    if ( @arrTPLTmp > 0 ){ chomp( @arrTPLTmp ); }
    else {
        return (-102, "Empty content from template file $sTplFilenameTmp");
    }

    return (0, \@arrTPLTmp);
}

#-----------------------------------------------------------------------
# Split template content
#
# Splits template lines as explained below
# - Line with "<PageIndex" text marks the start of page content, hence all lines above will be pre document lines
# - Line with "</PageIndex" text marks the end of page content, hence all content will be post document lines
# - Lines from "<PageIndex" to "</PageIndex" marks the page content
#
# Returns
# A hash reference: {
#     "predoc":  <Array reference of pre document lines>,
#     "postdoc": <Array reference of pre document lines>,
#     "page":    <Array reference of page content lines>
# }
#
#-----------------------------------------------------------------------
sub _splitTemplateContent(){

    #---- Get parameter
    #
    # 1. Template content (of type array reference)
    #
    my $arefTplLinesPar = shift;

    if ( $bLog == $TRUE ){ printf STDERR ( "_splitTemplateContent()\n" ); }

    my @arrTplLinesTmp = @{ $arefTplLinesPar };
    my @arrPreDocLinesTmp = ();
    my @arrPostDocLinesTmp = ();
    my @arrPageLinesTmp = ();

    my $bPageStartFoundTmp = $FALSE;
    my $bPageEndFoundTmp = $FALSE;

    foreach $l ( @arrTplLinesTmp ){
        if ( $l =~ /\<PageIndex/ ){
            $bPageStartFoundTmp = $TRUE;
            push( @arrPageLinesTmp, $l );

            next;
        }
        if ( $l =~ /\<\/PageIndex/ ){
            $bPageEndFoundTmp = $TRUE;
            push( @arrPageLinesTmp, $l );

            next;
        }

        if ( $bPageStartFoundTmp == $FALSE ){
            push( @arrPreDocLinesTmp, $l );
        }
        elsif ( $bPageStartFoundTmp == $TRUE && $bPageEndFoundTmp == $FALSE ){
            push( @arrPageLinesTmp, $l );
        }
        elsif ( $bPageStartFoundTmp == $TRUE && $bPageEndFoundTmp == $TRUE ){
            push( @arrPostDocLinesTmp, $l );
        }
    }

    #---- Fill in split value
    my $bValidTmp = $FALSE;
    my $hrefSplitTplTmp = {};
    if ( @arrPreDocLinesTmp > 0 ){ $hrefSplitTplTmp->{ "predoc" } = \@arrPreDocLinesTmp; $bValidTmp = $TRUE; }
    if ( @arrPageLinesTmp > 0 ){ $hrefSplitTplTmp->{ "page" } = \@arrPageLinesTmp; $bValidTmp = $TRUE; }
    if ( @arrPostDocLinesTmp > 0 ){ $hrefSplitTplTmp->{ "postdoc" } = \@arrPostDocLinesTmp; $bValidTmp = $TRUE; }

    return ( $bValidTmp == $TRUE ) ? $hrefSplitTplTmp : undef;
}

#-----------------------------------------------------------------------
# Fill template content
#
# - Takes in _splitTemplateContent return value as input
# - Iterates through each line of template and replaces the eyecatchers and indexes with actual values
# - Eyecatchers
# -- #OUTPUTTYPE#, output type. Example: pdf, png, txt
# -- #OUTPUTFILENAME#, output filename. Example: ./pdf/volker_ABERTR_sh6k.1.txt
# -- #OUTPUTPAGECOUNT#, output page count. Example: 4
# -- #OUTPUTSIZE#, output file size. Example: 5139
# -- #PAGENUMBER#, current page number. Example: 1
# - Indexes 
# -- Value as regular expression
# -- <Value>...pattern...</Value>
#
#-----------------------------------------------------------------------
sub _fillTemplateContent(){

    #---- Get parameter
    #
    # 1. Split template content (of type hash reference)
    #    {
    #        "predoc":  <Array reference of pre document lines>,
    #        "postdoc": <Array reference of pre document lines>,
    #        "page":    <Array reference of page content lines>
    #    }
    # 2. Document object (of type hash reference)
    #    {
    #        "type":                        <Output file type, Ex: pdf>,
    #        "file":                        <Output filename>,
    #        "filesize":                    <Output file size>,
    #        "pagecount":                   <Output page count>,
    #        "jobname":                     <Job name (as given by the LISTENTYP(JOBNAME) index>,
    #        "indexes": {
    #            <document index name>:     <document index value>
    #        }
    #        ,
    #        "pages": [
    #            {
    #                "id": <Page number>,
    #                "indexes": {
    #                    <page index name>: <page index value>
    #                }
    #            }
    #            , {
    #                "id": <Page number>,
    #                "indexes": {
    #                    <page index name>: <page index value>
    #                }
    #            }
    #        ]
    #    }
    #
    my $hrefSplitTemplatePar = shift;
    my $hrefDocumentPar = shift;

    if ( $bLog == $TRUE ){ printf STDERR ( "_fillTemplateContent()\n" ); }

    #---- Assert parameters
    if ( $hrefSplitTemplatePar == undef){
        return (-1, "Split template content parameter is undefined to fill in template");
    }
    if ( $hrefDocumentPar == undef ){
        return (-2, "Document content parameter is undefined to fill in template");
    }

    #---- Evaluate and fill in pre document content ----#
    # Build pre document eyecatcher values
    my $hrefPreDocEyecatchersTmp = {
        "#OUTPUTTYPE#" => $hrefDocumentPar->{ "type" }
      , "#OUTPUTFILENAME#" => $hrefDocumentPar->{ "file" }
      , "#OUTPUTPAGECOUNT#" => $hrefDocumentPar->{ "pagecount" }
      , "#OUTPUTSIZE#" => $hrefDocumentPar->{ "filesize" }
    };
    my @arrPreDocEyecatchersTmp = keys( %{ $hrefPreDocEyecatchersTmp } );
    my $sPreDocECsRegExTmp = join( "|", @arrPreDocEyecatchersTmp );
    my $rePreDocECsTmp = qr/$sPreDocECsRegExTmp/;
    my $sJobNameTmp = $hrefDocumentPar->{ "jobname" };
    my $hrefDocInfoTmp = $hrefJob2DocMapTable->{ $sJobNameTmp };

    #---- Process and fill in pre document content
    my $bPreDocUpdatedTmp = $FALSE;
    my @arrPreDocLinesTmp = @{ $hrefSplitTemplatePar->{ "predoc" } };
    for ( my $l = 0; $l < @arrPreDocLinesTmp; $l++ ){
        my $ln = $arrPreDocLinesTmp[ $l ];

        #---- Atleast one eyecatcher found in current pre doc line, Find and replace the eyecatchers
        if ( $ln =~ $rePreDocECsTmp ){
            foreach my $ec ( @arrPreDocEyecatchersTmp ){
                my $reECTmp = qr/($ec)/;
                $ln =~ s/$reECTmp/$hrefPreDocEyecatchersTmp->{ $1 }/;
            }

            $bPreDocUpdatedTmp = $TRUE;
            $arrPreDocLinesTmp[ $l ] = $ln;
        }
    }
    my $hrefFilledDocTmp = { "content" => [] };
    if ( $bPreDocUpdatedTmp == $TRUE ){ $hrefFilledDocTmp->{ "content" } = \@arrPreDocLinesTmp; }

    #---- Evaluate and fill in page(s) content ----#
    if ( $hrefDocumentPar->{ "pages" } != undef ){
        my @arrPagesTmp = @{ $hrefDocumentPar->{ "pages" } };
        if ( @arrPagesTmp > 0 ){
            # Build page eyecatcher values
            my $hrefPageEyecatchersTmp = {
                "#PAGENUMBER#" => 0
            };
            my @arrPageEyecatchersTmp = keys( %{ $hrefPageEyecatchersTmp } );
            my $sPageECsRegExTmp = join( "|", @arrPageEyecatchersTmp );
            my $rePageECsTmp = qr/$sPageECsRegExTmp/;

            for ( my $p = 0; $p < @arrPagesTmp; $p++ ){
                my $pg = $arrPagesTmp[ $p ];

                #---- Fill in page eyecatcher values
                $hrefPageEyecatchersTmp->{ "#PAGENUMBER#" } = $pg->{ "id" };

                #---- Process and fill in page content
                my $lnIdx = 0;
                my $bPageUpdatedTmp = $FALSE;
                my @arrPageLinesTmp = @{ $hrefSplitTemplatePar->{ "page" } };
                my @arrXMLPageLinesTmp = ();
                my $sPrevLineTmp = undef;
                my $sIdxNameTmp = undef;
                my $sIdxValueTmp = undef;
                for ( my $l = 0; $l < @arrPageLinesTmp; $l++, $lnIdx++ ){
                    my $ln = $arrPageLinesTmp[ $l ];
                    $arrXMLPageLinesTmp[ $lnIdx ] = $ln;

                    #---- At least one eyecatcher found in current page line, Find and replace the eyecatchers
                    if ( $ln =~ $rePageECsTmp ){
                        foreach my $ec ( @arrPageEyecatchersTmp ){
                            my $reECTmp = qr/($ec)/;
                            $ln =~ s/$reECTmp/$hrefPageEyecatchersTmp->{ $1 }/;
                        }

                        $bPageUpdatedTmp = $TRUE;
                        $arrXMLPageLinesTmp[ $lnIdx ] = $ln;
                    }

                    #---- Process page start
                    if (    $ln =~ /\<PageIndex/
                         && $hrefDocInfoTmp != undef
                       ){
                        my $sDocTypeTmp = $hrefDocInfoTmp->{ "doctype" };
                        my $sDocClassTmp = $hrefDocInfoTmp->{ "docclass" };

                        # Add document type index
                        if ( $sDocTypeTmp ne "" ){
                            $lnIdx++;
                            $arrXMLPageLinesTmp[ $lnIdx ] = "    <Data>";
                            $lnIdx++;
                            $arrXMLPageLinesTmp[ $lnIdx ] = "      <Name>DocumentType</Name>";
                            $lnIdx++;
                            $arrXMLPageLinesTmp[ $lnIdx ] = "      <Value>$sDocTypeTmp</Value>";
                            $lnIdx++;
                            $arrXMLPageLinesTmp[ $lnIdx ] = "    </Data>";
                        }

                        # Add document class index
                        if ( $sDocClassTmp ne "" ){
                            $lnIdx++;
                            $arrXMLPageLinesTmp[ $lnIdx ] = "    <Data>";
                            $lnIdx++;
                            $arrXMLPageLinesTmp[ $lnIdx ] = "      <Name>DocumentClass</Name>";
                            $lnIdx++;
                            $arrXMLPageLinesTmp[ $lnIdx ] = "      <Value>$sDocClassTmp</Value>";
                            $lnIdx++;
                            $arrXMLPageLinesTmp[ $lnIdx ] = "    </Data>";
                        }
                    }

                    #---- Process index
                    if ( $sPrevLineTmp =~ /\<Data\>/ ){
                        #---- Fetch index name
                        if ( $ln =~ /\<Name\>(.*)\<\/Name\>/ ){ $sIdxNameTmp = $1; }
                    }
                    if ( $sIdxNameTmp ne "" ){
                        #---- Evaluate and fill in index value as needed
                        if ( $ln =~ /\<Value\>(.*)\<\/Value\>/ ){
                            $sIdxValuePatternTmp = $1;

                            my $reECTmp = qr/$sIdxValuePatternTmp/;
                            my $sIdxValTmp = $pg->{ "indexes" }->{ $sIdxNameTmp };
                            if ( $bLog == $TRUE ){ printf STDERR ( "name:>$sIdxNameTmp< pattern:>$reECTmp< value:>$sIdxValTmp<\n" ); }
                            $sIdxValTmp =~ s/$reECTmp/$1/;

                            $ln =~ s/(\<Value\>.*\<\/Value\>)/\<Value\>$sIdxValTmp\<\/Value\>/;
                            $bPageUpdatedTmp = $TRUE;
                            $arrXMLPageLinesTmp[ $lnIdx ] = $ln;
                            if ( $bLog == $TRUE ){ printf STDERR ( "$ln\n" ); }
                        }
                    }
                    if ( $ln =~ /\<\/Data\>/ ){
                        $sIdxNameTmp = undef;
                        $sIdxValueTmp = undef;
                    }

                    $sPrevLineTmp = $ln;
                } # for ( my $l = 0; $l < @arrPageLinesTmp; $l++ )

                if ( $bPageUpdatedTmp == $TRUE ){
                    my @arrDocTmp = @{ $hrefFilledDocTmp->{ "content" } };
                    my @arrUpdatedDocTmp = ( @arrDocTmp, @arrXMLPageLinesTmp );
                    $hrefFilledDocTmp->{ "content" } = \@arrUpdatedDocTmp;
                }
            } # for ( my $p = 0; $p < @arrPagesTmp; $p++ )
        } # if ( @arrPagesTmp > 0 )
    } # if ( $hrefDocumentPar->{ "pages" } != undef )

    #---- Evaluate and fill in post document content ----#
    if ( $hrefSplitTemplatePar->{ "postdoc" } != undef ){
        my @arrPostDocLinesTmp = @{ $hrefSplitTemplatePar->{ "postdoc" } };
        if ( @arrPostDocLinesTmp > 0 ){
            my @arrDocTmp = @{ $hrefFilledDocTmp->{ "content" } };
            my @arrUpdatedDocTmp = ( @arrDocTmp, @arrPostDocLinesTmp );
            $hrefFilledDocTmp->{ "content" } = \@arrUpdatedDocTmp;
        }
    }

    return $hrefFilledDocTmp;
}

#-----------------------------------------------------------------------
# Generate XML
#
# - From the filled in XML content, generates a XML file
#
#-----------------------------------------------------------------------
sub _generateXML(){

    #---- Get parameter
    #
    # 1. XML filename (of type string)
    # 2. XML content (of type hash reference)
    #    {
    #        "content": [<Array of XML lines>]
    #    }
    #
    my $sXMLFilenamePar = shift;
    my $hrefXMLDocPar = shift;

    if ( $bLog == $TRUE ){ printf STDERR ( "_generateXML($sXMLFilenamePar)\n" ); }

    #---- Write XML content
    my $iRcTmp = open( XML, ">$sXMLFilenamePar" );
    if ( $iRcTmp == 0 ){
        return (-105, "Unable to open XML file $sXMLFilenamePar. rc=$iRcTmp reason=" . $!);
    }

    foreach my $l ( @{ $hrefXMLDocPar->{ "content" } } ){
        my $sXMLEntryTmp = $l . "\n";
        syswrite( XML, $sXMLEntryTmp, length($sXMLEntryTmp) );
    }

    close( XML );

    return 0;
}

__END__
