From b3788fe34d42582e67577c198fa968c6793dae09 Mon Sep 17 00:00:00 2001 From: TheDevKiller Date: Wed, 7 Nov 2018 23:32:46 +0100 Subject: [PATCH] Added code to replace
by \n in positions --- fetch.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fetch.py b/fetch.py index 3675636..d34a979 100755 --- a/fetch.py +++ b/fetch.py @@ -7,12 +7,14 @@ Created on Mon Nov 5 20:55:36 2018 """ import requests +import re def runfetch(url): session=requests.Session() - req1=session.get(url) + req1=session.get(url) # Get the source code text=req1.text.split('\n') + # Parse the HTML source code text=['\n'.join(row.split('\n')[:row.split('\n').index(""):]) for row in '\n'.join(text[text.index('
ANNEXES
ANNEXE I

'):text.index('
Annexe
'):]).split('')[2::]] diclist=[] for row in text: @@ -23,8 +25,11 @@ def runfetch(url): if __name__ == '__main__': - diclist=runfetch('https://www.legifrance.gouv.fr/eli/arrete/2018/10/12/PRMD1824595A/jo/texte') + diclist=runfetch('https://www.legifrance.gouv.fr/eli/arrete/2018/10/12/PRMD1824595A/jo/texte') # Fetch all the data from a french article url='https://www.legifrance.gouv.fr/eli/arrete/2018/10/12/PRMD1824595A/jo/texte' + for index, zone in enumerate(diclist): + for element in zone: + diclist[index][element] = re.sub("", "\n", diclist[index][element]) # Replace
,
and
by \n # Print the dict print("===== Dict =====") for index, zone in enumerate(diclist):