mirror of
https://github.com/Ladebeze66/llm_ticket3.git
synced 2025-12-13 15:46:52 +01:00
1504-16:07testrecupok
This commit is contained in:
parent
1c36d1bba6
commit
4cd697e11a
File diff suppressed because it is too large
Load Diff
142
extract_single_message.py
Normal file
142
extract_single_message.py
Normal file
@ -0,0 +1,142 @@
|
||||
# extract_single_message.py
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
|
||||
def parse_args():
|
||||
"""Parse les arguments de ligne de commande."""
|
||||
parser = argparse.ArgumentParser(description="Extrait un message spécifique et ses images depuis un fichier JSON.")
|
||||
|
||||
parser.add_argument("--input", "-i",
|
||||
help="Chemin vers le fichier JSON contenant les messages (par défaut: messages_raw.json)",
|
||||
default="messages_raw.json")
|
||||
|
||||
parser.add_argument("--message-id", "-m",
|
||||
help="ID du message à extraire (par défaut: 228942)",
|
||||
type=int, default=228942)
|
||||
|
||||
parser.add_argument("--output", "-o",
|
||||
help="Répertoire de sortie (par défaut: extracted_message)",
|
||||
default="extracted_message")
|
||||
|
||||
parser.add_argument("--create-test", "-t",
|
||||
help="Créer un fichier de test si le fichier d'entrée n'existe pas",
|
||||
action="store_true")
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
def create_test_data(input_file, message_id):
|
||||
"""Crée un fichier de test contenant un message avec une image."""
|
||||
print(f"⚠️ Le fichier {input_file} n'existe pas. Création d'un exemple de test...")
|
||||
|
||||
test_data = {
|
||||
"messages": [
|
||||
{
|
||||
"id": message_id,
|
||||
"body": "<p>Voici une image de test:</p><img src='https://example.com/image.jpg' alt='Test'>",
|
||||
"author_id": [1, "Utilisateur Test"],
|
||||
"date": "2023-01-01 12:00:00",
|
||||
"subject": "Message de test"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
try:
|
||||
# Créer le répertoire parent si nécessaire
|
||||
parent_dir = os.path.dirname(input_file)
|
||||
if parent_dir and not os.path.exists(parent_dir):
|
||||
os.makedirs(parent_dir, exist_ok=True)
|
||||
|
||||
with open(input_file, "w", encoding="utf-8") as f:
|
||||
json.dump(test_data, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f"✅ Fichier de test créé : {input_file}")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"❌ Erreur lors de la création du fichier de test : {e}")
|
||||
return False
|
||||
|
||||
def extract_message(input_file, message_id, output_dir, create_test=False):
|
||||
"""Extrait un message spécifique et ses images depuis un fichier JSON."""
|
||||
# Vérifier si le fichier d'entrée existe
|
||||
if not os.path.exists(input_file):
|
||||
if create_test:
|
||||
created = create_test_data(input_file, message_id)
|
||||
if not created:
|
||||
print(f"❌ Impossible de créer le fichier de test {input_file}")
|
||||
return False
|
||||
else:
|
||||
print(f"❌ Le fichier {input_file} n'existe pas. Utilisez --create-test pour créer un exemple.")
|
||||
return False
|
||||
|
||||
try:
|
||||
# Créer le répertoire de sortie
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
# Charger les données
|
||||
with open(input_file, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
messages = data.get("messages", [])
|
||||
found = next((m for m in messages if m["id"] == message_id), None)
|
||||
|
||||
if not found:
|
||||
print(f"❌ Message ID {message_id} non trouvé.")
|
||||
return False
|
||||
|
||||
print(f"✅ Message ID {message_id} trouvé.")
|
||||
|
||||
# 1. Sauvegarder le HTML brut
|
||||
body_html = found.get("body", "")
|
||||
html_path = os.path.join(output_dir, f"message_{message_id}.html")
|
||||
with open(html_path, "w", encoding="utf-8") as f:
|
||||
f.write(body_html)
|
||||
print(f"📄 HTML sauvegardé dans : {html_path}")
|
||||
|
||||
# 2. Extraire les images depuis le HTML
|
||||
soup = BeautifulSoup(body_html, "html.parser")
|
||||
image_urls = []
|
||||
|
||||
# Extraction sécurisée des URLs d'images
|
||||
for img in soup.find_all("img"):
|
||||
if isinstance(img, Tag) and img.has_attr("src"):
|
||||
image_urls.append(img["src"])
|
||||
|
||||
image_data = {
|
||||
"message_id": message_id,
|
||||
"author": found.get("author_id", [None, None])[1],
|
||||
"date": found.get("date"),
|
||||
"subject": found.get("subject"),
|
||||
"image_urls": image_urls
|
||||
}
|
||||
|
||||
# 3. Sauvegarder sous forme JSON
|
||||
json_path = os.path.join(output_dir, f"message_{message_id}_meta.json")
|
||||
with open(json_path, "w", encoding="utf-8") as f:
|
||||
json.dump(image_data, f, indent=2, ensure_ascii=False)
|
||||
print(f"🧾 Métadonnées sauvegardées dans : {json_path}")
|
||||
|
||||
print("\n✅ Extraction terminée.")
|
||||
print("🖼️ Images détectées :")
|
||||
for url in image_urls:
|
||||
print(f" - {url}")
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"❌ Erreur lors de l'extraction : {e}")
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
success = extract_message(
|
||||
input_file=args.input,
|
||||
message_id=args.message_id,
|
||||
output_dir=args.output,
|
||||
create_test=args.create_test
|
||||
)
|
||||
|
||||
if not success:
|
||||
sys.exit(1)
|
||||
110
extract_single_message_filter.py
Normal file
110
extract_single_message_filter.py
Normal file
@ -0,0 +1,110 @@
|
||||
# extract_clean_message_with_image.py
|
||||
|
||||
import json
|
||||
import os
|
||||
import argparse
|
||||
import requests
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="Extrait un message filtré et nettoyé avec son image utile.")
|
||||
parser.add_argument("--input", "-i", default="messages_raw.json", help="Fichier JSON des messages")
|
||||
parser.add_argument("--message-id", "-m", type=int, default=228942, help="ID du message à traiter")
|
||||
parser.add_argument("--output", "-o", default="extracted_message", help="Répertoire de sortie")
|
||||
return parser.parse_args()
|
||||
|
||||
def clean_message_html(html, keep_img=True):
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
|
||||
# Supprimer les balises inutiles (tooltips, tracking, etc.)
|
||||
for tag in soup.select(".o_mail_info, .recipients_info, .o_mail_tracking, .o_thread_icons, .o_thread_tooltip_container, .o_thread_full_spand_icon"):
|
||||
tag.decompose()
|
||||
|
||||
# Supprimer les images inutiles (font_to_img, sauf si /web/image/)
|
||||
for img in soup.find_all("img"):
|
||||
if not isinstance(img, Tag):
|
||||
continue
|
||||
|
||||
if not img.has_attr("src"):
|
||||
continue
|
||||
|
||||
src = img["src"]
|
||||
if not isinstance(src, str):
|
||||
continue
|
||||
|
||||
if "font_to_img" in src:
|
||||
img.decompose()
|
||||
elif keep_img and not src.startswith("http") and "/web/image/" not in src:
|
||||
img.decompose()
|
||||
|
||||
return str(soup)
|
||||
|
||||
def download_image(url, output_dir, filename="image.png"):
|
||||
try:
|
||||
r = requests.get(url, stream=True, timeout=10)
|
||||
if r.status_code == 200:
|
||||
path = os.path.join(output_dir, filename)
|
||||
with open(path, "wb") as f:
|
||||
for chunk in r.iter_content(1024):
|
||||
f.write(chunk)
|
||||
return path
|
||||
else:
|
||||
print(f"⚠️ Échec du téléchargement de l'image ({r.status_code}): {url}")
|
||||
except Exception as e:
|
||||
print(f"⚠️ Erreur lors du téléchargement de l'image: {e}")
|
||||
return None
|
||||
|
||||
def extract_and_clean(input_file, message_id, output_dir):
|
||||
if not os.path.exists(input_file):
|
||||
print(f"❌ Fichier non trouvé : {input_file}")
|
||||
return
|
||||
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
with open(input_file, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
messages = data.get("messages", [])
|
||||
msg = next((m for m in messages if m["id"] == message_id), None)
|
||||
|
||||
if not msg:
|
||||
print(f"❌ Message ID {message_id} non trouvé.")
|
||||
return
|
||||
|
||||
print(f"✅ Message trouvé : {msg.get('subject')}")
|
||||
|
||||
# Nettoyer le HTML
|
||||
original_html = msg.get("body", "")
|
||||
clean_html = clean_message_html(original_html)
|
||||
|
||||
# Extraire les images utiles
|
||||
soup = BeautifulSoup(clean_html, "html.parser")
|
||||
image_urls = []
|
||||
|
||||
for img in soup.find_all("img"):
|
||||
if isinstance(img, Tag) and img.has_attr("src"):
|
||||
src = img["src"]
|
||||
if isinstance(src, str) and (src.startswith("http") or "/web/image/" in src):
|
||||
image_urls.append(src)
|
||||
|
||||
# Télécharger la 1ère image
|
||||
downloaded = None
|
||||
for url in image_urls:
|
||||
if isinstance(url, str) and url.startswith("http"):
|
||||
downloaded = download_image(url, output_dir, "image.png")
|
||||
break
|
||||
|
||||
# Sauvegarde HTML nettoyé
|
||||
html_path = os.path.join(output_dir, f"message_{message_id}_clean.html")
|
||||
with open(html_path, "w", encoding="utf-8") as f:
|
||||
f.write(clean_html)
|
||||
|
||||
print(f"📄 HTML nettoyé sauvegardé : {html_path}")
|
||||
if downloaded:
|
||||
print(f"🖼️ Image téléchargée dans : {downloaded}")
|
||||
else:
|
||||
print(f"❗Aucune image HTTP directe n'a été téléchargée (images internes Odoo seulement ?)")
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
extract_and_clean(args.input, args.message_id, args.output)
|
||||
68
extract_text_from_html.py
Normal file
68
extract_text_from_html.py
Normal file
@ -0,0 +1,68 @@
|
||||
# extract_text_from_html.py
|
||||
|
||||
import os
|
||||
import argparse
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="Extrait le texte pur d'un fichier HTML.")
|
||||
parser.add_argument("--input", "-i", default="extracted_message/message_228942_clean.html",
|
||||
help="Fichier HTML à traiter")
|
||||
parser.add_argument("--output", "-o", default="extracted_message/message_text.txt",
|
||||
help="Fichier texte de sortie")
|
||||
parser.add_argument("--preserve-images", "-p", action="store_true",
|
||||
help="Préserver les références aux images")
|
||||
return parser.parse_args()
|
||||
|
||||
def extract_text(html_file, output_file, preserve_images=False):
|
||||
"""Extrait le texte d'un fichier HTML et le sauvegarde dans un fichier texte."""
|
||||
if not os.path.exists(html_file):
|
||||
print(f"❌ Fichier non trouvé : {html_file}")
|
||||
return False
|
||||
|
||||
# Créer le répertoire de sortie si nécessaire
|
||||
output_dir = os.path.dirname(output_file)
|
||||
if output_dir and not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
# Charger et parser le HTML
|
||||
with open(html_file, "r", encoding="utf-8") as f:
|
||||
html_content = f.read()
|
||||
|
||||
soup = BeautifulSoup(html_content, "html.parser")
|
||||
|
||||
# Supprimer les balises vides et inutiles
|
||||
for tag in soup.find_all():
|
||||
if isinstance(tag, Tag):
|
||||
if not tag.get_text(strip=True):
|
||||
tag.decompose()
|
||||
|
||||
# Extraire le texte avec une meilleure mise en forme
|
||||
paragraphs = []
|
||||
|
||||
# Traiter les paragraphes
|
||||
for p in soup.find_all("p"):
|
||||
if isinstance(p, Tag):
|
||||
text = p.get_text(strip=True)
|
||||
if text: # Ignorer les paragraphes vides
|
||||
paragraphs.append(text)
|
||||
|
||||
# Traiter les images si demandé
|
||||
if preserve_images:
|
||||
for img in soup.find_all("img"):
|
||||
if isinstance(img, Tag) and img.has_attr("src"):
|
||||
paragraphs.append(f"[IMAGE: {img['src']}]")
|
||||
|
||||
# Créer le texte final avec une structure claire
|
||||
text_content = "\n\n".join(paragraphs)
|
||||
|
||||
# Sauvegarder le texte
|
||||
with open(output_file, "w", encoding="utf-8") as f:
|
||||
f.write(text_content)
|
||||
|
||||
print(f"✅ Texte extrait et sauvegardé dans : {output_file}")
|
||||
return True
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
extract_text(args.input, args.output, args.preserve_images)
|
||||
52
extract_tracking_value_by_id.py
Normal file
52
extract_tracking_value_by_id.py
Normal file
@ -0,0 +1,52 @@
|
||||
# extract_tracking_values_by_id.py
|
||||
|
||||
import json
|
||||
import os
|
||||
from odoo.auth_manager import AuthManager
|
||||
|
||||
CONFIG_FILE = "config.json"
|
||||
|
||||
def load_config(path=CONFIG_FILE):
|
||||
with open(path, "r") as f:
|
||||
return json.load(f)
|
||||
|
||||
def fetch_tracking_values(ids, config_file=CONFIG_FILE):
|
||||
config = load_config(config_file)
|
||||
odoo_cfg = config["odoo"]
|
||||
|
||||
auth = AuthManager(
|
||||
url=odoo_cfg["url"],
|
||||
db=odoo_cfg["db"],
|
||||
username=odoo_cfg["username"],
|
||||
api_key=odoo_cfg["api_key"]
|
||||
)
|
||||
if not auth.login():
|
||||
print("❌ Échec de l'authentification.")
|
||||
return
|
||||
|
||||
result = auth.read(
|
||||
model="mail.tracking.value",
|
||||
ids=ids,
|
||||
fields=["field", "field_desc", "old_value_char", "new_value_char"]
|
||||
)
|
||||
|
||||
# Sauvegarde JSON
|
||||
out_path = "tracking_value_result.json"
|
||||
with open(out_path, "w", encoding="utf-8") as f:
|
||||
json.dump(result, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f"✅ Résultat enregistré dans {out_path}")
|
||||
for entry in result:
|
||||
print(f"- Champ : {entry['field_desc']}")
|
||||
print(f" Ancienne valeur : {entry['old_value_char']}")
|
||||
print(f" Nouvelle valeur : {entry['new_value_char']}")
|
||||
print()
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description="Lire les valeurs de suivi (mail.tracking.value)")
|
||||
parser.add_argument("ids", nargs="+", type=int, help="IDs à interroger (ex: 179095)")
|
||||
parser.add_argument("--config", "-c", default=CONFIG_FILE, help="Chemin vers le fichier config")
|
||||
args = parser.parse_args()
|
||||
|
||||
fetch_tracking_values(args.ids, args.config)
|
||||
BIN
extracted_message/image.png
Normal file
BIN
extracted_message/image.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 25 KiB |
14
extracted_message/message_228942_clean.html
Normal file
14
extracted_message/message_228942_clean.html
Normal file
@ -0,0 +1,14 @@
|
||||
|
||||
|
||||
<div class="o_thread_message_content" style='font-size:13px; font-family:"Lucida Grande", Helvetica, Verdana, Arial, sans-serif'>
|
||||
<p style='margin:0px 0 1rem 0; font-size:13px; font-family:"Lucida Grande", Helvetica, Verdana, Arial, sans-serif'>
|
||||
</p>
|
||||
<div style='font-size:13px; font-family:"Lucida Grande", Helvetica, Verdana, Arial, sans-serif'>
|
||||
<p style='margin:0px 0 1rem 0; font-size:13px; font-family:"Lucida Grande", Helvetica, Verdana, Arial, sans-serif'><span style='font-size:11.0pt; font-family:"Calibri",sans-serif'>Bonjour,</span></p><p style='margin:0px 0 1rem 0; font-size:13px; font-family:"Lucida Grande", Helvetica, Verdana, Arial, sans-serif'></p><p style='margin:0px 0 1rem 0; font-size:13px; font-family:"Lucida Grande", Helvetica, Verdana, Arial, sans-serif'></p>
|
||||
<p style='margin:0px 0 1rem 0; font-size:13px; font-family:"Lucida Grande", Helvetica, Verdana, Arial, sans-serif'><span style='font-size:11.0pt; font-family:"Calibri",sans-serif'></span></p><p style='margin:0px 0 1rem 0; font-size:13px; font-family:"Lucida Grande", Helvetica, Verdana, Arial, sans-serif'> </p><p style='margin:0px 0 1rem 0; font-size:13px; font-family:"Lucida Grande", Helvetica, Verdana, Arial, sans-serif'></p>
|
||||
<p style='margin:0px 0 1rem 0; font-size:13px; font-family:"Lucida Grande", Helvetica, Verdana, Arial, sans-serif'><span style='font-size:11.0pt; font-family:"Calibri",sans-serif'>Je ne parviens pas à accéder au l’essai au bleu :</span></p><p style='margin:0px 0 1rem 0; font-size:13px; font-family:"Lucida Grande", Helvetica, Verdana, Arial, sans-serif'></p><p style='margin:0px 0 1rem 0; font-size:13px; font-family:"Lucida Grande", Helvetica, Verdana, Arial, sans-serif'></p>
|
||||
<p style='margin:0px 0 1rem 0; font-size:13px; font-family:"Lucida Grande", Helvetica, Verdana, Arial, sans-serif'><img height="337" id="Image_x0020_1" src="https://odoo.cbao.fr/web/image/145435?access_token=608ac9e7-3627-4a13-a8ec-06ff5046ebf3" style="border-style:none; vertical-align:middle; width:6.5in; height:3.5083in" width="624"/><span style='font-size:11.0pt; font-family:"Calibri",sans-serif'></span></p><p style='margin:0px 0 1rem 0; font-size:13px; font-family:"Lucida Grande", Helvetica, Verdana, Arial, sans-serif'></p><p style='margin:0px 0 1rem 0; font-size:13px; font-family:"Lucida Grande", Helvetica, Verdana, Arial, sans-serif'></p>
|
||||
<p style='margin:0px 0 1rem 0; font-size:13px; font-family:"Lucida Grande", Helvetica, Verdana, Arial, sans-serif'><span style='font-size:11.0pt; font-family:"Calibri",sans-serif'></span></p><p style='margin:0px 0 1rem 0; font-size:13px; font-family:"Lucida Grande", Helvetica, Verdana, Arial, sans-serif'> </p><p style='margin:0px 0 1rem 0; font-size:13px; font-family:"Lucida Grande", Helvetica, Verdana, Arial, sans-serif'></p>
|
||||
<p style='margin:0px 0 1rem 0; font-size:13px; font-family:"Lucida Grande", Helvetica, Verdana, Arial, sans-serif'><span style='font-size:11.0pt; font-family:"Calibri",sans-serif'>Merci par avance pour votre.</span></p><p style='margin:0px 0 1rem 0; font-size:13px; font-family:"Lucida Grande", Helvetica, Verdana, Arial, sans-serif'></p><p style='margin:0px 0 1rem 0; font-size:13px; font-family:"Lucida Grande", Helvetica, Verdana, Arial, sans-serif'></p>
|
||||
<p style='margin:0px 0 1rem 0; font-size:13px; font-family:"Lucida Grande", Helvetica, Verdana, Arial, sans-serif'><span style='font-size:11.0pt; font-family:"Calibri",sans-serif'></span></p><p style='margin:0px 0 1rem 0; font-size:13px; font-family:"Lucida Grande", Helvetica, Verdana, Arial, sans-serif'> </p><p style='margin:0px 0 1rem 0; font-size:13px; font-family:"Lucida Grande", Helvetica, Verdana, Arial, sans-serif'></p>
|
||||
<p style='margin:0px 0 1rem 0; font-size:13px; font-family:"Lucida Grande", Helvetica, Verdana, Arial, sans-serif'><span style='font-size:11.0pt; font-family:"Calibri",sans-serif'>Cordialement</span></p></div></div><p style='margin:0px 0 1rem 0; font-size:13px; font-family:"Lucida Grande", Helvetica, Verdana, Arial, sans-serif'><br/></p>
|
||||
7
extracted_message/message_text.txt
Normal file
7
extracted_message/message_text.txt
Normal file
@ -0,0 +1,7 @@
|
||||
Bonjour,
|
||||
|
||||
Je ne parviens pas à accéder au l’essai au bleu :
|
||||
|
||||
Merci par avance pour votre.
|
||||
|
||||
Cordialement
|
||||
273
messages_raw.json
Normal file
273
messages_raw.json
Normal file
File diff suppressed because one or more lines are too long
9
tracking_value_result.json
Normal file
9
tracking_value_result.json
Normal file
@ -0,0 +1,9 @@
|
||||
[
|
||||
{
|
||||
"id": 179095,
|
||||
"field": "user_id",
|
||||
"field_desc": "Assigné à",
|
||||
"old_value_char": "",
|
||||
"new_value_char": "Fabien LAFAY"
|
||||
}
|
||||
]
|
||||
Loading…
x
Reference in New Issue
Block a user