Add mypy checks.

mypy checks have been added to the gitlab pipeline.
They can be triggered via `make mypy`.

Some (minor) fixes to error checks are inlcuded.
......@@ -26,7 +26,7 @@ ima-venv:
make -C ${ima_home} venv
cd ${ima_home}; make test
cd ${ima_home}; make mypy; make test
archive: ima-venv
tar cvz --exclude=".*" -f ${gitlab_package_archive} .
......@@ -26,6 +26,11 @@ venv: ${pip_requirements}
conda install conda-pack; \
conda-pack -n ${venv} --format ${venv_archive_format}"
mypy: ${pip_requirements_test}
${DOCKER_CMD} bash -c "export CONDA_ALWAYS_YES=true; ${CONDA_CMD}; \
pip install -r ${pip_requirements_test}; \
mypy spark"
test: ${pip_requirements_test}
${DOCKER_CMD} bash -c "export CONDA_ALWAYS_YES=true; ${CONDA_CMD}; \
conda install openjdk pyspark==${pyspark_version}; \
\ No newline at end of file
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from schema import CsvDataset
from .schema import CsvDataset
import argparse
......@@ -29,7 +29,7 @@ if __name__ == "__main__":
csv_df = (
spark.read.options(delimiter="\t", header=False, escape='"')
spark.read.options(delimiter="\t", header="false", escape='"')
......@@ -2,8 +2,8 @@ from pyspark.sql import SparkSession
from pyspark.sql import Column, DataFrame
from pyspark.sql import functions as F
from pyspark.sql.types import IntegerType
from schema import RawDataset
from instances_to_filter import InstancesToFilter
from .schema import RawDataset
from .instances_to_filter import InstancesToFilter
import argparse
import uuid
