test_etl.py 589 Bytes
Newer Older
Gmodena's avatar
Gmodena committed
1
2
3
4
from etl.transform import RawDataset, ImageRecommendation


def test_etl(raw_data):
5
    assert raw_data.count() == 2
Gmodena's avatar
Gmodena committed
6
7

    ddf = ImageRecommendation(raw_data).transform()
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
    assert (
        len(
            set(ddf.columns).difference(
                {
                    "wiki",
                    "page_id",
                    "page_title",
                    "image_id",
                    "confidence_rating",
                    "source",
                }
            )
        )
        == 0
    )

    expected_num_records = 2
    assert ddf.count() == expected_num_records