# infodata > inpythom > intools 
# module in_docx
# Author: [Th] 23/02/2024
from docx import Document
from docxcompose.composer import Composer
from docxtpl import DocxTemplate
import datetime as dt
import inpython.intools.in_log as inlog
import os


def chk_br(par):
    # renvoi true en de présence d'un saut de page
    for run in par.r_lst:
        if len(run.br_lst) > 0:
            return (True)
    return (False)


def doc_replace_var(input_filename, output_filename, context_str):
    import json

    context = json.loads(context_str)
    tpl = DocxTemplate(input_filename)
    tpl.render(context)
    tpl.save(output_filename)
    return ("")


def doc_extract(doc_directory, extract_name, input_filename, skipped_pg_br, keeped_pg_br, final_br=True):
    # permet d'extraire une partie d'un document word docx
    # fonctionne à la manière d'une group extract
    #
    # doc_directory : chemin complet vers le répertoire où sera enregsitré le résultat
    # extract_name : sera utilisé pour créér le nom du fichier final si "" on prend "extract"
    # input_filename : chemin complet vers le fichier d'origine duquel sera fait l'extraction
    # skipped_pg_br : nombre de sauts de page à passer avant de commencer l'extraction
    # keeped_pg:br : nombre de sauts correspondant à l'extraction
    #
    # input : retourne le chemin complet vers le fichier résultat. 
    #         le nom du fihier prend la forme yyyymmdd-hhmmss-extract_name.docx
    inlog.init_log("in_docx", f"Extraction des pages entre les break {skipped_pg_br} et {keeped_pg_br}")

    if len(extract_name) == 0:
        extract_name = "extract"

    tmp_file = f"{doc_directory}{dt.datetime.now().strftime('%Y%m%d-%H%M%S-')}{extract_name}.docx"

    doc_fab = Document(input_filename)
    doc_fab.save(tmp_file)
    doc_tmp = Document(tmp_file)

    # extraction des pages du docuemnt fabriquant
    num_page_breaks = 0

    state = 0

    if type(skipped_pg_br) is str:
        skipped_pg_br = int(skipped_pg_br)
    if type(keeped_pg_br) is str:
        keeped_pg_br = int(keeped_pg_br)

    for elt_idx, elt in enumerate(doc_tmp.element.body):
        if state == 0:
            if elt.tag == "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}p":
                if chk_br(elt):
                    num_page_breaks += 1
                if num_page_breaks == skipped_pg_br:
                    num_page_breaks = 0
                    doc_tmp.save(tmp_file)
                    state = 1
                doc_tmp.element.body.remove(elt)
        elif state == 1:
            if elt.tag == "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}p":
                if chk_br(elt):
                    num_page_breaks += 1
                if num_page_breaks == keeped_pg_br:
                    state = 2
                    if not (final_br):
                        doc_tmp.element.body.remove(elt)
            # elif elt.tag == "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}tbl":
            # table_copy(elt, doc_fab, doc_tmp)
        elif state == 2:
            if elt.tag != "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}sectPr":
                doc_tmp.element.body.remove(elt)

    doc_tmp.save(tmp_file)

    return (tmp_file)


def doc_br_count(input_filename):
    input_doc = Document(input_filename)

    # extraction des pages du docuemnt fabriquant
    num_page_breaks = 0

    state = 0

    for elt_idx, elt in enumerate(input_doc.element.body):
        if elt.tag == "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}p":
            if chk_br(elt):
                num_page_breaks += 1

    return (num_page_breaks)


def doc_str_find(input_filename, subString, occNumber):
    inlog.init_log("in_docx", f"Recherche de {subString}")

    input_doc = Document(input_filename)
    occNumber = int(occNumber)
    # extraction des pages du docuemnt fabriquant
    num_page_breaks = 0

    occurence = 0
    found = False
    for elt_idx, elt in enumerate(input_doc.element.body):
        if found:
            break
        if elt.tag == "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}p":
            if chk_br(elt):
                num_page_breaks += 1
            if subString in elt.text:
                break
        elif elt.tag == "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}tbl":
            for trlst in elt.tr_lst:
                for tclst in trlst.tc_lst:
                    for plst in tclst.p_lst:
                        for rlst in plst.r_lst:
                            if subString in rlst.text:
                                occurence += 1
                                if occurence == occNumber:
                                    found = True
                                    inlog.debug(
                                        f"Occurenc {occNumber} of \"{subString}\" found after {num_page_breaks} page breaks!")
                                    break
    if not (found):
        num_page_breaks = 0
    return (num_page_breaks)


def compose_doc(base_file, files_list, final_doc):
    inlog.init_log("composer", "Assemblage des documents", True)
    inlog.debug(os.path.normpath(base_file))
    inlog.debug(files_list)
    inlog.debug(final_doc)
    files_list = files_list.split(";")
    master = Document(os.path.normpath(base_file))
    composer = Composer(master)
    for filename in files_list:
        doc_temp = Document(os.path.normpath(filename))
        composer.append(doc_temp)
    composer.save(os.path.normpath(final_doc))
    return


if __name__ == '__main__':
    flist = ['//winprod/commun/is/chantier/1000/schemas.docx', '//winprod/commun/is/chantier/1000/1000_DETAIL.docx',
             '//winprod/commun/is/chantier/_doc_base/Formulaire_ALU_cgv.docx']
    compose_doc('//winprod/commun/is/chantier/_doc_base/Formulaire_ALU_1-3.docx', flist,
                '//winprod/commun/is/chantier/1000/Devis.docx')
