Jump to content
vasilecaraus

Python: De ce ce nu pot sa traduc acest html tag "meta..description" ?

Recommended Posts

Posted

Am acest cod python care funcționează foarte bine.
Asadar, pot să traduc toate tagurile HTML, cu excepția unuia: `<meta name="description" content="...">`
Ma poate ajuta cineva cu o solutie? Nu știu de ce nu funcționează acest tag sa-l traduc.
  

from bs4 import BeautifulSoup
from bs4.formatter import HTMLFormatter
from googletrans import Translator
import requests


translator = Translator()


class UnsortedAttributes(HTMLFormatter):
def attributes(self, tag):
     for k, v in tag.attrs.items():
         yield k, v


files_from_folder = r"c:\carte\1"


use_translate_folder = True


destination_language = 'fr'


extension_file = ".html"


import os
directory = os.fsencode(files_from_folder)


def recursively_translate(node):
for x in range(len(node.contents)):
     if isinstance(node.contents[x], str):
         if node.contents[x].strip() != '':
             try:
                 node.contents[x].replaceWith(translator.translate(node.contents[x], dest=destination_language).text)
             except:
                 pass
     elif node.contents[x] != None:
         recursively_translate(node.contents[x])
amount = 1
for file in os.listdir(directory):
filename = os.fsdecode(file)
print(filename)
if filename == 'y_key_e479323ce281e459.html' or filename == 'directory.html':
     continue
if filename.endswith(extension_file):
     with open(os.path.join(files_from_folder, filename), encoding='utf-8') as html:
         soup = BeautifulSoup('<pre>' + html.read() + '</pre>', 'html.parser')
         for title in soup.findAll('title'):
             recursively_translate(title)
         for meta in soup.findAll('meta', {'name':'description'}):
             try:
                 meta['content'] = recursively_translate(meta['content'])
             except:
                 pass

         for p in soup.findAll('p', class_='text_obisnuit2'):
                 recursively_translate(p)

     print(f'{filename} translated ({amount})')
     amount += 1
     soup = soup.encode(formatter=UnsortedAttributes()).decode('utf-8')
     new_filename = f'{filename.split(".")[0]}_{destination_language}.html'
     if use_translate_folder:
         try:
             with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html:
                 new_html.write(soup[5:-6])
         except:
             os.mkdir(files_from_folder+r'\translated')
             with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html:
                 new_html.write(soup[5:-6])
     else:
         with open(os.path.join(files_from_folder, new_filename), 'w', encoding='utf-8') as html:
             html.write(soup[5:-6])


test.html

<html>
<head>

<title>It really helps me do great things for her</title>
    
<meta name="description" content="What I LOVE to do and what I don't love">

<p class="text_obisnuit2"><em>Buckingham has a new book called Love</em></p>

</body>
</html>

 

Join the conversation

You can post now and register later. If you have an account, sign in now to post with your account.

Guest
Reply to this topic...

×   Pasted as rich text.   Paste as plain text instead

  Only 75 emoji are allowed.

×   Your link has been automatically embedded.   Display as a link instead

×   Your previous content has been restored.   Clear editor

×   You cannot paste images directly. Upload or insert images from URL.



×
×
  • Create New...