Am acest cod python care funcționează foarte bine.
Asadar, pot să traduc toate tagurile HTML, cu excepția unuia: `<meta name="description" content="...">`
Ma poate ajuta cineva cu o solutie? Nu știu de ce nu funcționează acest tag sa-l traduc.
from bs4 import BeautifulSoup
from bs4.formatter import HTMLFormatter
from googletrans import Translator
import requests
translator = Translator()
class UnsortedAttributes(HTMLFormatter):
def attributes(self, tag):
for k, v in tag.attrs.items():
yield k, v
files_from_folder = r"c:\carte\1"
use_translate_folder = True
destination_language = 'fr'
extension_file = ".html"
import os
directory = os.fsencode(files_from_folder)
def recursively_translate(node):
for x in range(len(node.contents)):
if isinstance(node.contents[x], str):
if node.contents[x].strip() != '':
try:
node.contents[x].replaceWith(translator.translate(node.contents[x], dest=destination_language).text)
except:
pass
elif node.contents[x] != None:
recursively_translate(node.contents[x])
amount = 1
for file in os.listdir(directory):
filename = os.fsdecode(file)
print(filename)
if filename == 'y_key_e479323ce281e459.html' or filename == 'directory.html':
continue
if filename.endswith(extension_file):
with open(os.path.join(files_from_folder, filename), encoding='utf-8') as html:
soup = BeautifulSoup('<pre>' + html.read() + '</pre>', 'html.parser')
for title in soup.findAll('title'):
recursively_translate(title)
for meta in soup.findAll('meta', {'name':'description'}):
try:
meta['content'] = recursively_translate(meta['content'])
except:
pass
for p in soup.findAll('p', class_='text_obisnuit2'):
recursively_translate(p)
print(f'{filename} translated ({amount})')
amount += 1
soup = soup.encode(formatter=UnsortedAttributes()).decode('utf-8')
new_filename = f'{filename.split(".")[0]}_{destination_language}.html'
if use_translate_folder:
try:
with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html:
new_html.write(soup[5:-6])
except:
os.mkdir(files_from_folder+r'\translated')
with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html:
new_html.write(soup[5:-6])
else:
with open(os.path.join(files_from_folder, new_filename), 'w', encoding='utf-8') as html:
html.write(soup[5:-6])
test.html
<html>
<head>
<title>It really helps me do great things for her</title>
<meta name="description" content="What I LOVE to do and what I don't love">
<p class="text_obisnuit2"><em>Buckingham has a new book called Love</em></p>
</body>
</html>