Commit 2b0c8f63 authored by Miriam Redi's avatar Miriam Redi Committed by GitHub
Browse files

Merge pull request #6 from mirrys/T275685-automate-pytest

T275685 automate pytest
parents 38c7167a f9a1c746
name: build
on: [push]
runs-on: ubuntu-latest
max-parallel: 4
python-version: [3.7, ]
- uses: actions/checkout@v1
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v1
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
make venv
- name: Lint python files with flake8
run: |
make flake8
- uses: olafurpg/setup-scala@v10
java-version: adopt@1.8
- name: Install Apache Spark
run: |
# This command will install vanilla spark under ./spark-2.4.7-bin-hadoop2.7
make install_spark
- name: Test with pytest
run: |
export SPARK_HOME=$(pwd)/spark-2.4.7-bin-hadoop2.7
export PYTHONPATH=${SPARK_HOME}/python:${SPARK_HOME}/python/lib/${PYTHONPATH}
export PATH=${PATH}:${SPARK_HOME}/bin:${SPARK_HOME}/sbin
make test
spark_version := 2.4.7
hadoop_version := 2.7
spark_home := spark-${spark_version}-bin-hadoop${hadoop_version}
spark_tgz_url :=${spark_version}/${spark_home}.tgz
venv: requirements.txt
test -d venv || virtualenv --python=$(shell which python3) venv
. venv/bin/activate; pip install -Ur requirements.txt;
test -d venv || python3 -m venv venv
. venv/bin/activate; pip3 install -Ur requirements.txt;
test -d ${spark_home} || (wget ${spark_tgz_url}; tar -xzvf ${spark_home}.tgz)
rm -r ${spark_home}; rm -rf ${spark_home}.tgz
flake8: venv
# stop the build if there are Python syntax errors or undefined names in *.py file
. venv/bin/activate; flake8 *.py etl/ tests/ --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
. venv/bin/activate; flake8 *.py etl/ tests/ --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
test: venv
. venv/bin/activate; pytest --cov etl
. venv/bin/activate; pytest --cov etl tests/
# ImageMatching
Image recommendation for unillustrated Wikipedia articles
from pyspark.sql import SparkSession
from pyspark.sql.types import ArrayType, StructType, StringType, DoubleType, IntegerType
from pyspark.sql.types import StructType, StringType, IntegerType
from pyspark.sql import Column, DataFrame
from pyspark.sql import functions as F
from etl.transform import RawDataset, ImageRecommendation
from etl.transform import ImageRecommendation
def test_etl(raw_data):
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment