User:TolBot/Task 1
Appearance
Status | Approved (inactive) |
---|---|
Wiki | w:en: |
Summary | Update COVID-19 vaccination data templates |
Page(s) | Pages: |
Period | daily |
Language | Python |
Supervision | supervised |
Excl. compl.? | No |
- Replaced by Task 5
- Task 1A updated w:en:Template:COVID-19 vaccination data in Africa
Source
[edit]Version 1.1.1, updated 14 May 2021. |
---|
run = False
page = 'Template:COVID-19 vaccination data'
edit_summary = ''
bot_username = ''
bot_password = ''
### Variables
skipped_locations = ('Northern Ireland', 'Scotland', 'Wales', 'England', 'Europe', 'Africa', 'Oceania', 'North America', 'South America', 'Asia', 'High income', 'Upper middle income', 'Lower middle income', 'Low income')
#regex_vac_prev = '<!-- DATA BELOW THIS LINE UPDATED AUTOMATICALLY -->\n'
#regex_vac_post = '\n<!-- DATA ABOVE THIS LINE UPDATED AUTOMATICALLY -->'
regex_vac_prev = '<!-- PASTE UPDATED DATA BELOW THIS LINE -->\n'
regex_vac_post = '\n<!-- UPDATED DATA ABOVE THIS LINE -->'
regex_vac_str = regex_vac_prev + r'(.*?)' + regex_vac_post
regex_date_prev_re = r'\(as of <!--DATE OF LATEST AVAILABLE DATA \(USUALLY ONE DAY PRIOR TO THE REFRESH DATE\), DO NOT CHANGE THIS DATE UNLESS TABLE DATA HAS BEEN UPDATED-->' # Backslash parentheses
regex_date_post_re = r'\{\{efn\|' # Backslash curly brackets and pipe
regex_date_str = regex_date_prev_re + r'(.*?)' + regex_date_post_re
regex_date_prev = '(as of <!--DATE OF LATEST AVAILABLE DATA (USUALLY ONE DAY PRIOR TO THE REFRESH DATE), DO NOT CHANGE THIS DATE UNLESS TABLE DATA HAS BEEN UPDATED-->'
regex_date_post = '{{efn|'
api = 'https://en.wikipedia.org/w/api.php'
api_par_token = '?action=query&meta=tokens&type=login&format=json'
api_par_login = '?action=login&format=json'
api_par_parse = '?action=parse&prop=wikitext&format=json'
api_par_edit_token = '?action=query&meta=tokens&format=json'
api_par_edit = '?action=edit&format=json'
### Functions
# Import
import pandas as pd
from datetime import datetime as dt
import requests
import re
import json
import numpy as np
import subprocess
# Login
def login():
session = requests.Session()
token_obj = session.get(api + api_par_token)
token_json = token_obj.json()
token = token_json['query']['tokens']['logintoken']
params = {
'lgname': bot_username,
'lgpassword': bot_password,
'lgtoken': token
}
login_obj = session.post(api + api_par_login, data=params)
login_json = login_obj.json()
if login_json['login']['result'] != 'Success':
raise ConnectionError('Login failed.')
userid = login_json['login']['lguserid']
username = login_json['login']['lgusername']
return session
# Download & load CSV file
def load():
try:
subprocess.run(['wget', '-q', '-O', 'vaccinations.csv', 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv'])
except:
raise ConnectionError('Could not get vaccination data')
return pd.read_csv('vaccinations.csv')
# Sort and remove duplicates
def clean(vac):
vac.sort_values('date', ascending=False, inplace=True) # Sort by date
vac.drop_duplicates('location', inplace=True) # Drop duplicate locations (so only the most recent remains)
vac['sort'] = vac['total_vaccinations']
vac['sort'] = np.where(vac['sort'].isnull(), vac['people_vaccinated'], vac['sort']) # Sort by people vaccinated if no total vaccinations
vac['sort'] = np.where(vac['sort'].isnull(), vac['people_fully_vaccinated'], vac['sort']) # Sort by people fully vaccinated if no total vaccinations or people vaccinated
vac.sort_values('sort', ascending=False, inplace=True) # Sort
if len(vac.index) < 100 or len(vac.index) > 1000: # Sanity check
raise ValueError('Vaccination data abnormally sized')
return vac
# Generate wikitext
def generate(vac):
text = ''
for index, row in vac.iterrows():
if row['location'] == 'World':
row_text = '|{{pad|0.1em}}[[File:Emojione 1F310.svg|23x15px|alt=|link=]]{{pad|0.4em}}World{{efn|name=world-total}}'
elif row['location'] in skipped_locations:
continue
else:
row_text = '|{{flag+link|COVID-19 pandemic in|' + row['location'] + '}}'
if not pd.isna(row['people_vaccinated']): # Use people vaccinated if available
num = int(row['people_vaccinated'])
row_text += '||' + '{:,}'.format(num) + '||' + str(round(row['people_vaccinated_per_hundred'], 1)) + '%'
#row_text += '||' + f'{num:,}' + '||' + str(round(row['people_vaccinated_per_hundred'], 1)) + '%' # Py ≥ 3.6
elif not pd.isna(row['total_vaccinations']): # Use total vaccinations if available
num = int(row['total_vaccinations'])
row_text += '{{efn|name=incorrect-total}}||{{font color|darkred|' + '{:,}'.format(num) + '}}||--'
#row_text += '{{efn|name=incorrect-total}}||{{font color|darkred|' + f'{num:,}' + '}}||--' # Py ≥ 3.6
elif not pd.isna(row['people_fully_vaccinated']): # Use people fully vaccinated if available
num = int(row['people_fully_vaccinated'])
row_text += '{{efn|This country\'s data reflects {{font color|darkorange|people fully vaccinated}}, not people vaccinated at least once.}}||{{font color|darkorange|' + '{:,}'.format(num) + '}}||--'
#row_text += '{{efn|This country\'s data reflects {{font color|darkorange|people fully vaccinated}}, not people vaccinated at least once.}}||{{font color|darkorange|' + f'{num:,}' + '}}||--' # Py ≥ 3.6
else: # Skip if no data
continue
row_text += '<tr>'
text += row_text + '\n'
return text
# Get current date
def get_date():
date_obj = dt.utcnow()
date_year = str(date_obj.year)
date_month = date_obj.strftime('%B')
date_day = str(date_obj.day)
date = date_day + ' ' + date_month + ' ' + date_year
return date
# Get wikitext from page
def parse(session):
page_obj = session.get(api + api_par_parse + '&page=' + page)
page_json = page_obj.json()
page_wikitext = page_json['parse']['wikitext']['*']
return page_wikitext
# Find and replace data with regex
def change(wikitext_old, vac_wikitext, date):
repl_vac = regex_vac_prev + vac_wikitext + regex_vac_post
repl_date = regex_date_prev + date + regex_date_post
wikitext_vac = re.sub(regex_vac_str, repl_vac, wikitext_old, flags=re.DOTALL)
wikitext_date = re.sub(regex_date_str, repl_date, wikitext_vac)
return wikitext_date
# Edit the page with new data
def edit(session, wikitext_new):
token_obj = session.get(api + api_par_edit_token)
token_json = token_obj.json()
token = token_json['query']['tokens']['csrftoken']
params = {
'title': page,
'text': wikitext_new,
'summary': edit_summary,
'token': token
}
edit_obj = session.post(api + api_par_edit, data=params)
edit_json = edit_obj.json()
return edit_json
# Run
vac_wikitext = generate(clean(load()))
session = login()
wikitext_old = parse(session)
date = get_date()
wikitext_new = change(wikitext_old, vac_wikitext, date)
if run: result = edit(session, wikitext_new)
else: print(wikitext_new)
|
Licensing
[edit]This work (all source code in this level 2 section) is licensed under:
- The Creative Commons Attribution-ShareAlike 3.0 (CC BY-SA 3.0) license
- The Creative Commons Attribution-ShareAlike 4.0 (CC BY-SA 4.0) license
- The GNU Lesser General Public License (LGPL), version 3 or any later version
- The GNU Free Documentation License (GFDL), version 1.3 or any later version, with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts
Version history
[edit]- 1.0.0-pre: Initial release. 17 April 2021; Python 3.7.
- 1.0.0: Fix sorting bug (report: w:en: diff #1018455365); add exception handling for initial variables not defined. 18 April 2021; Python 3.7.
- 1.0.1: Use
.format()
instead of f-strings for compatibility with Python <3.6. 21 Apr 2021; Python 3.5–3.7. - 1.1.0: Improve
generate()
by adding fallback to people fully vaccinated. 7 May 2021; Python 3.5–3.7. - 1.1.1: Add 'High income', 'Upper middle income', 'Lower middle income', 'Low income' to the list of locations to skip. 14 May 2021; Python 3.5–3.7.