-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathingest.py
32 lines (24 loc) · 820 Bytes
/
ingest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import os
from glob import glob
from striprtf.striprtf import rtf_to_text
import time
separator = "---"
# Percorso del file di segnalazione nel volume condiviso
signal_file_path = "/shared-data/spark-ready"
# Intervallo di polling in secondi
polling_interval = 3
# Verifica se il file di segnalazione esiste
while not os.path.exists(signal_file_path):
# Se il file non esiste, attendi l'intervallo di polling e riprova
time.sleep(polling_interval)
os.remove(signal_file_path)
time.sleep(5)
# Dir dove stanno i files
directory_path = "/eco-inguine"
# Get a list of all .rtf files in the directory
rtf_files = glob(os.path.join(directory_path, "*.rtf"))
for file_path in rtf_files:
with open(file_path, "r") as file:
text = rtf_to_text(file.read())
print(text)
print(separator)