diff --git a/fetch.py b/fetch.py
index 3675636..d34a979 100755
--- a/fetch.py
+++ b/fetch.py
@@ -7,12 +7,14 @@ Created on Mon Nov 5 20:55:36 2018
"""
import requests
+import re
def runfetch(url):
session=requests.Session()
- req1=session.get(url)
+ req1=session.get(url) # Get the source code
text=req1.text.split('\n')
+ # Parse the HTML source code
text=['\n'.join(row.split('\n')[:row.split('\n').index(""):]) for row in '\n'.join(text[text.index('
ANNEXES
ANNEXE I
'):text.index('Annexe
'):]).split('')[2::]]
diclist=[]
for row in text:
@@ -23,8 +25,11 @@ def runfetch(url):
if __name__ == '__main__':
- diclist=runfetch('https://www.legifrance.gouv.fr/eli/arrete/2018/10/12/PRMD1824595A/jo/texte')
+ diclist=runfetch('https://www.legifrance.gouv.fr/eli/arrete/2018/10/12/PRMD1824595A/jo/texte') # Fetch all the data from a french article
url='https://www.legifrance.gouv.fr/eli/arrete/2018/10/12/PRMD1824595A/jo/texte'
+ for index, zone in enumerate(diclist):
+ for element in zone:
+ diclist[index][element] = re.sub("", "\n", diclist[index][element]) # Replace
,
and
by \n
# Print the dict
print("===== Dict =====")
for index, zone in enumerate(diclist):