Commit 1474e46c authored by Diegodlh's avatar Diegodlh
Browse files

Specified different selection and transformation step types separately

parent 0bf2edbd
......@@ -2,7 +2,7 @@
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://gitlab.wikimedia.org/diegodlh/w2c-core/-/blob/main/templates.schema.json",
"title": "Domain translation templates configuration for Web2Cit",
"description": "A list of translation templates",
"description": "A list of translation templates. See https://meta.wikimedia.org/wiki/Web2Cit/Early_adopters#templates.json",
"type": "array",
"items": {
"title": "Translation template",
......@@ -61,27 +61,47 @@
"properties": {
"selections": {
"title": "Selection",
"description": "A list of selection steps; the combined output of all selection steps is given as input to the first transformation step below",
"description": "A list of selection steps (see https://meta.wikimedia.org/wiki/Web2Cit/Early_adopters#Selection_types_and_configs); the combined output of all selection steps is given as input to the first transformation step below",
"default": [],
"type": "array",
"items": {
"title": "Selection step",
"description": "A selection step selects and extracts individual elements from the target webpage",
"oneOf": [
{
"title": "Citoid selection",
"description": "Selects a field from the Citoid response for the target webpage",
"type": "object",
"properties": {
"type": {
"title": "Type",
"description": "The specific selection step type (see https://meta.wikimedia.org/wiki/Web2Cit/Early_adopters#Selection_types_and_configs)",
"type": "string",
"enum": [
"citoid",
"xpath",
"fixed"
"enum": ["citoid"]
},
"config": {
"title": "Configuration",
"description": "Any valid Citoid/Zotero base field name; creators (e.g., \"author\") are split into creatorFirst and creatorLast. Use the \"citation\" endpoint of Wikimedia REST API (format \"mediawiki-basefields\") to check what Citoid returns for the target webpage: https://en.wikipedia.org/api/rest_v1/#/Citation/getCitation.",
"type": "string"
}
},
"required": [
"type",
"config"
]
},
{
"title": "XPath selection",
"description": "Selects a node from the target webpage's HTML using XPath",
"type": "object",
"properties": {
"type": {
"title": "Type",
"type": "string",
"enum": ["xpath"]
},
"config": {
"title": "Configuration",
"description": "Selection step configuration; each selection type supports different configuration values (see https://meta.wikimedia.org/wiki/Web2Cit/Early_adopters#Selection_types_and_configs)",
"description": "Any valid XPath expression. Your browser's inspector may help you get one.",
"type": "string"
}
},
......@@ -89,38 +109,177 @@
"type",
"config"
]
},
{
"title": "Fixed selection",
"description": "Always returns the same predefined value",
"type": "object",
"properties": {
"type": {
"title": "Type",
"type": "string",
"enum": ["fixed"]
},
"config": {
"title": "Configuration",
"description": "The predefined value to be returned.",
"type": "string"
}
},
"required": [
"type",
"config"
]
}
]
}
},
"transformations": {
"title": "Transformation",
"description": "A list of transformation steps to be applied one after the other; the output of the last transformation step is the procedure's output",
"description": "A list of transformation steps to be applied one after the other (see https://meta.wikimedia.org/wiki/Web2Cit/Early_adopters#Transformation_types_and_configs); the output of the last transformation step is the procedure's output",
"type": "array",
"items": {
"title": "Transformation step",
"description": "Transformation steps transform selected elements (if needed) to return the expected output",
"oneOf": [
{
"title": "Join transformation",
"description": "Joins two or more items in a list into one, using the separator specified",
"type": "object",
"properties": {
"type": {
"title": "Type",
"type": "string",
"enum": ["join"]
},
"config": {
"title": "Configuration",
"description": "The separator to use.",
"type": "string"
},
"itemwise": {
"$ref": "#/definitions/itemwise",
"default": false
}
},
"required": [
"type",
"config",
"itemwise"
]
},
{
"title": "Split transformation",
"description": "Splits a string at the separator specified into two or more substrings",
"type": "object",
"properties": {
"type": {
"title": "Type",
"type": "string",
"enum": ["split"]
},
"config": {
"title": "Configuration",
"description": "The separator to use.",
"type": "string"
},
"itemwise": {
"$ref": "#/definitions/itemwise",
"default": true
}
},
"required": [
"type",
"config",
"itemwise"
]
},
{
"title": "Date transformation",
"description": "Uses the Sugar.js to try and parse natural language dates into the YYYY-MM-DD format",
"type": "object",
"properties": {
"type": {
"title": "Type",
"description": "The specific transformation step type (see https://meta.wikimedia.org/wiki/Web2Cit/Early_adopters#Transformation_types_and_configs)",
"type": "string",
"enum": ["date"]
},
"config": {
"title": "Configuration",
"description": "Any of the currently supported locales",
"type": "string",
"enum": [
"join",
"split",
"date",
"range",
"match"
"ca",
"da",
"de",
"en",
"es",
"fi",
"fr",
"it",
"ja",
"ko",
"nl",
"no",
"pl",
"pt",
"ru",
"sv",
"zh-CN",
"zh-TW"
],
"options": {
"enum_titles": [
"Catalan (ca)",
"Danish (da)",
"German (de)",
"English (en)",
"Spanish (es)",
"Finnish (fi)",
"French (fr)",
"Italian (it)",
"Japanese (ja)",
"Korean (ko)",
"Dutch (nl)",
"Norwegian (no)",
"Polish (pl)",
"Portuguese (pt)",
"Russian (ru)",
"Swedish (sv)",
"Chinese (zh-CN)",
"Chinese (zh-TW)"
]
}
},
"itemwise": {
"$ref": "#/definitions/itemwise",
"default": true
}
},
"required": [
"type",
"config",
"itemwise"
]
},
{
"title": "Range transformation",
"description": "Returns one or more items or ranges of items in the order specified",
"type": "object",
"properties": {
"type": {
"title": "Type",
"type": "string",
"enum": ["range"]
},
"config": {
"title": "Configuration",
"description": "Transformation step configuration; each transformation type supports different configuration values (see https://meta.wikimedia.org/wiki/Web2Cit/Early_adopters#Transformation_types_and_configs)",
"description": "One or more zero-based comma-separated ranges: \"start(:end)\", \"start:\" or \":end\".",
"type": "string"
},
"itemwise": {
"title": "Item-wise",
"description": "Whether transformation should be applied to each item of the input independently (true), or to the entire input as a whole (false)",
"type": "boolean"
"$ref": "#/definitions/itemwise",
"default": false
}
},
"required": [
......@@ -128,6 +287,34 @@
"config",
"itemwise"
]
},
{
"title": "Match transformation",
"description": "Returns one or more substrings matching a target",
"type": "object",
"properties": {
"type": {
"title": "Type",
"type": "string",
"enum": ["match"]
},
"config": {
"title": "Configuration",
"description": "The matching target, expressed as either plain string or /regular expression/ (see https://meta.wikimedia.org/wiki/Web2Cit/Early_adopters#Match_transformation).",
"type": "string"
},
"itemwise": {
"$ref": "#/definitions/itemwise",
"default": true
}
},
"required": [
"type",
"config",
"itemwise"
]
}
]
}
}
},
......@@ -150,5 +337,12 @@
"path",
"fields"
]
},
"definitions": {
"itemwise": {
"title": "Item-wise",
"description": "Whether transformation should be applied to each item of the input independently (true), or to the entire input as a whole (false)",
"type": "boolean"
}
}
}
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment