Commit c1973d15 authored by Ottomata's avatar Ottomata
Browse files

Remove docopt as dep in pyspark_tester.py

parent fd0998d2
Pipeline #4148 skipped with stage
......@@ -36,8 +36,21 @@ Submit using conda env python but provided spark2-submit
`REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt PYSPARK_DRIVER_PYTHON=example-job-project-0.16.0.dev0.conda/bin/python PYSPARK_PYTHON=venv/bin/python spark2-submit --master yarn --archives example-job-project-0.16.0.dev0.conda.tgz#venv example-job-project-0.16.0.dev0.conda/bin/pyspark_tester.py`
/usr/lib/spark2/bin/spark-submit --driver-java-options "-Dhttp.proxyHost=http://webproxy.eqiad.wmnet
-Dhttp.proxyPort=8080 -Dhttps.proxyHost=http://webproxy.eqiad.wmnet -Dhttps.proxyPort=8080"
--master yarn --conf spark.yarn.maxAppAttempts=1 --conf spark.yarn.appMasterEnv.PYSPARK_PYTHON=venv/bin/python
--conf spark.yarn.appMasterEnv.PYSPARK_DRIVER_PYTHON=venv/bin/python --conf spark.yarn.appMasterEnv.REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt
--archives hdfs:///user/otto/example-job-project-0.16.0.dev0.conda.tgz#venv --name
spark_conda_test_dag__pyspark_tester_provided_spark2__20220101 --deploy-mode client
venv/bin/pyspark_tester.py
Submit scala/java job using conda env spark-submit
`REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt SPARK_CONF_DIR=~/spark3.conf PYSPARK_DRIVER_PYTHON=example-job-project-0.16.0.dev0.conda/bin/python PYSPARK_PYTHON=venv/bin/python ./example-job-project-0.16.0.dev0.conda/bin/spark-submit --master yarn --archives example-job-project-0.16.0.dev0.conda.tgz#venv --class org.apache.spark.examples.SparkPi spark-examples_2.12-3.0.1.jar 100`
What do to about spark conf for pyspark in conda env?
\ No newline at end of file
......@@ -8,10 +8,7 @@ depending on the arguments you provide to spark submit.
from typing import Optional, List
import os
import time
import sys
from docopt import docopt
from pprint import pprint
......@@ -50,18 +47,12 @@ def do_wmf_http_requests() -> List[str]:
def main(argv: Optional[List[str]] = None) -> None:
if argv is None:
argv = sys.argv[1:]
script_name = os.path.basename(sys.argv[0])
doc = f"""
Usage: {script_name}
def main() -> None:
"""
Usage: spark-submit [...] pyspark_tester.py
Runs various pyspark tests to verify spark deployments in in different clusters.
"""
args = docopt(doc, argv)
from pyspark.sql import SparkSession
spark = SparkSession.builder.getOrCreate()
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment