Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Gergő Tisza
infobox-templates
Commits
9114c9c4
Verified
Commit
9114c9c4
authored
Dec 02, 2021
by
Gergő Tisza
Browse files
Fix namespace handling
Only remove Template: namespace prefix, not e.g. modules.
parent
8c5acf51
Changes
1
Hide whitespace changes
Inline
Side-by-side
infobox-templates.py
View file @
9114c9c4
...
...
@@ -16,9 +16,19 @@ import mwapi
# helpers
def
strip_ns
(
title
:
str
):
return
title
.
split
(
':'
,
1
)[
-
1
]
template_ns_name
=
None
def
is_template
(
title
:
str
):
"""Check if a title is in the Template namespace."""
return
title
.
startswith
(
template_ns_name
+
':'
)
def
strip_ns
(
title
:
str
):
"""Remove Template namespace prefix but only if the result is unambiguous."""
if
is_template
(
title
):
short_title
=
title
.
split
(
':'
,
1
)[
-
1
]
if
':'
not
in
short_title
:
return
short_title
return
title
# globals
...
...
@@ -35,10 +45,23 @@ parser.add_argument('--format', '-f', choices = ['search', 'links', 'json'],
default
=
'search'
,
help
=
'template list format'
)
args
=
parser
.
parse_args
()
# fetch infobox templates by looking at wikidata instance_of:Wikimedia_infobox properties
# load global data
ua
=
'one-off/gtisza@wikimedia.org'
lang
=
args
.
wiki_language
wikipedia_session
=
mwapi
.
Session
(
'https://%s.wikipedia.org/w/api.php'
%
lang
,
ua
)
data
=
wikipedia_session
.
get
(
formatversion
=
2
,
action
=
'query'
,
meta
=
'siteinfo'
,
siprop
=
'namespaces'
,
)
template_ns_name
=
data
[
'query'
][
'namespaces'
][
'10'
][
'name'
]
# fetch infobox templates by looking at wikidata instance_of:Wikimedia_infobox properties
query
=
"""
select distinct ?itemLabel where {
?item wdt:P31 wd:Q19887878 .
...
...
@@ -111,6 +134,8 @@ misc_infobox_templates = set(all_infobox_templates) - set(derived_infobox_templa
# expand templates to see the transclusion tree without <includeonly> messing it up
for
tpl
in
set
(
misc_infobox_templates
):
if
not
is_template
(
tpl
):
continue
data
=
wikipedia_session
.
get
(
action
=
'parse'
,
formatversion
=
2
,
...
...
@@ -121,12 +146,12 @@ for tpl in set(misc_infobox_templates):
for
page
in
data
[
'parse'
].
get
(
'templates'
,
[]):
if
page
[
'title'
]
in
infobox_building_block_templates
:
misc_infobox_templates
.
discard
(
tpl
)
continue
break
# done: use building blocks + non-building-block-based Wikidata-tracked infoboxes
search_infobox_templates
=
misc_infobox_templates
|
set
(
infobox_building_block_templates
)
search_infobox_templates
=
sorted
(
search_infobox_templates
,
key
=
l
ocale
.
strxfrm
)
search_infobox_templates
=
sorted
(
search_infobox_templates
,
key
=
l
ambda
t
:
str
(
not
is_template
(
tpl
))
+
locale
.
strxfrm
(
strip_ns
(
t
))
)
# output
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment