Verified Commit 9114c9c4 authored by Gergő Tisza's avatar Gergő Tisza
Browse files

Fix namespace handling

Only remove Template: namespace prefix, not e.g. modules.
parent 8c5acf51
......@@ -16,9 +16,19 @@ import mwapi
# helpers
def strip_ns(title: str):
return title.split(':', 1)[-1]
template_ns_name = None
def is_template(title: str):
"""Check if a title is in the Template namespace."""
return title.startswith(template_ns_name + ':')
def strip_ns(title: str):
"""Remove Template namespace prefix but only if the result is unambiguous."""
if is_template(title):
short_title = title.split(':', 1)[-1]
if ':' not in short_title:
return short_title
return title
# globals
......@@ -35,10 +45,23 @@ parser.add_argument('--format', '-f', choices = ['search', 'links', 'json'],
default = 'search', help = 'template list format')
args = parser.parse_args()
# fetch infobox templates by looking at wikidata instance_of:Wikimedia_infobox properties
# load global data
ua = 'one-off/gtisza@wikimedia.org'
lang = args.wiki_language
wikipedia_session = mwapi.Session('https://%s.wikipedia.org/w/api.php' % lang, ua)
data = wikipedia_session.get(
formatversion = 2,
action = 'query',
meta = 'siteinfo',
siprop = 'namespaces',
)
template_ns_name = data['query']['namespaces']['10']['name']
# fetch infobox templates by looking at wikidata instance_of:Wikimedia_infobox properties
query = """
select distinct ?itemLabel where {
?item wdt:P31 wd:Q19887878 .
......@@ -111,6 +134,8 @@ misc_infobox_templates = set(all_infobox_templates) - set(derived_infobox_templa
# expand templates to see the transclusion tree without <includeonly> messing it up
for tpl in set(misc_infobox_templates):
if not is_template(tpl):
continue
data = wikipedia_session.get(
action = 'parse',
formatversion = 2,
......@@ -121,12 +146,12 @@ for tpl in set(misc_infobox_templates):
for page in data['parse'].get('templates', []):
if page['title'] in infobox_building_block_templates:
misc_infobox_templates.discard(tpl)
continue
break
# done: use building blocks + non-building-block-based Wikidata-tracked infoboxes
search_infobox_templates = misc_infobox_templates | set(infobox_building_block_templates)
search_infobox_templates = sorted(search_infobox_templates, key=locale.strxfrm)
search_infobox_templates = sorted(search_infobox_templates, key=lambda t: str(not is_template(tpl)) + locale.strxfrm(strip_ns(t)))
# output
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment