I'm trying to replace href url with a result value using regex, I also tried Beautifulsoup module but no success.keep getting one and the same one url in all html files
class RandomChoiceNoImmediateRepeat(object):
def __init__(self, lst):
self.lst = lst
self.last = None
def choice(self):
if self.last is None:
self.last = random.choice(self.lst)
return self.last
else:
nxt = random.choice(self.lst)
# make a new choice as long as it's equal to the last.
while nxt == self.last:
nxt = random.choice(self.lst)
# Replace the last and return the choice
self.last = nxt
return nxt
for filename in glob.glob('/docs/*.txt'):
file_metadata = { 'name': 'file.txt', 'mimeType': '*/*' }
media = MediaFileUpload(filename, mimetype='*/*', resumable=True)
file = drive_service.files().create(body=file_metadata, media_body=media, fields='id').execute()
link = 'https://drive.google.com/uc?export=download&id=' + file.get('id')
linkd = []
linkd.append(link)
for filename in glob.glob('/docs/htmlz/*.html'):
with open(filename, "r") as html_file:
soup = BeautifulSoup(html_file,'html.parser')
for anchor in soup.findAll("a", attrs={ "class" : "downloadme" }):
gen = RandomChoiceNoImmediateRepeat(linkd)
i = gen.choice()
anchor['href'] = str(i)
with open(filename, "w") as html_file:
html_file.write(str(soup))
html_file.close()
Aucun commentaire:
Enregistrer un commentaire