diff --git a/fetch.py b/fetch.py index 3675636..d34a979 100755 --- a/fetch.py +++ b/fetch.py @@ -7,12 +7,14 @@ Created on Mon Nov 5 20:55:36 2018 """ import requests +import re def runfetch(url): session=requests.Session() - req1=session.get(url) + req1=session.get(url) # Get the source code text=req1.text.split('\n') + # Parse the HTML source code text=['\n'.join(row.split('\n')[:row.split('\n').index(""):]) for row in '\n'.join(text[text.index('
ANNEXES
ANNEXE I

'):text.index('
Annexe
'):]).split('')[2::]] diclist=[] for row in text: @@ -23,8 +25,11 @@ def runfetch(url): if __name__ == '__main__': - diclist=runfetch('https://www.legifrance.gouv.fr/eli/arrete/2018/10/12/PRMD1824595A/jo/texte') + diclist=runfetch('https://www.legifrance.gouv.fr/eli/arrete/2018/10/12/PRMD1824595A/jo/texte') # Fetch all the data from a french article url='https://www.legifrance.gouv.fr/eli/arrete/2018/10/12/PRMD1824595A/jo/texte' + for index, zone in enumerate(diclist): + for element in zone: + diclist[index][element] = re.sub("", "\n", diclist[index][element]) # Replace
,
and
by \n # Print the dict print("===== Dict =====") for index, zone in enumerate(diclist):