diff --git a/analytics/dags/wikidata/wikidata_metrics_to_graphite_daily_dag.py b/analytics/dags/wikidata/wikidata_metrics_to_graphite_daily_dag.py index c66d722fab7ce0a99cb3de11b8466823b20fb267..d5221048e056b3b0d08e0eba2f20736cd1798dda 100644 --- a/analytics/dags/wikidata/wikidata_metrics_to_graphite_daily_dag.py +++ b/analytics/dags/wikidata/wikidata_metrics_to_graphite_daily_dag.py @@ -12,7 +12,7 @@ ### Metrics include: * ArticlePlaceholder metrics - - Metrics can be viewed in Graphite under the daily.wikidata.articleplaceholder namespace, + Metrics can be viewed in Graphite under the daily.wikidata.articleplaceholder namespace, in varnish_requests.abouttopic.* folder * Reliability metrics - metrics for the Wikidata reliability graphs @@ -66,13 +66,13 @@ with DAG( #define common arguments for the 3 etl tasks hive_to_graphite_common_args = [ # Graphite parameters - '--graphite_host', 'graphite-in.eqiad.wmnet', + '--graphite_host', 'graphite-in.eqiad.wmnet', '--graphite_port', 2003, - # HQL parameters - '-d', f'webrequest_table={source_table}', - '-d', f'year={year}', - '-d', f'month={month}', - '-d', f'day={day}', + # HQL parameters + '-d', f'webrequest_table={source_table}', + '-d', f'year={year}', + '-d', f'month={month}', + '-d', f'day={day}', '-d', 'coalesce_partitions=4' ] @@ -94,9 +94,12 @@ with DAG( application=var_props.get('refinery_job_jar', dag_config.artifact('refinery-job-0.2.1-shaded.jar')), java_class='org.wikimedia.analytics.refinery.job.HiveToGraphite', + # This job generates too much YARN logs for the skein client. + # Preventing skein to collect logs avoids issues. + skein_app_log_collection_enabled=False, application_args=[ '-f', var_props.get('query_file2', f'{dag_config.hql_directory}/wikidata/wikidata_reliability_metrics.hql'), - '--metric_prefix', var_props.get('metric_prefix2', 'daily.wikidata.reliability_metrics'), + '--metric_prefix', var_props.get('metric_prefix2', 'daily.wikidata.reliability_metrics'), ]+hive_to_graphite_common_args, conf={'spark.dynamicAllocation.maxExecutors': 128}, sla=timedelta(hours=6) @@ -107,6 +110,9 @@ with DAG( application=var_props.get('refinery_job_jar', dag_config.artifact('refinery-job-0.2.1-shaded.jar')), java_class='org.wikimedia.analytics.refinery.job.HiveToGraphite', + # This job generates too much YARN logs for the skein client. + # Preventing skein to collect logs avoids issues. + skein_app_log_collection_enabled=False, application_args=[ '-f', var_props.get('query_file3', f'{dag_config.hql_directory}/wikidata/wikidata_specialentity_data_metrics.hql'), '--metric_prefix', var_props.get('metric_prefix3', 'daily.wikidata.entitydata'),