Adds a worker script for running quality control jobs. (#562)

2017-10-30 16:24:12 +01:00 · 2017-10-30 16:24:12 +01:00 · 8895bfc96a
parent ea3e7397ac
commit 8895bfc96a
2 changed files with 252 additions and 1 deletions
--- a/jenkins/Dockerfile.kinetic
+++ b/jenkins/Dockerfile.kinetic
@ -72,7 +72,7 @@ COPY scripts/ros_entrypoint.sh /
 RUN rm -rf cartographer_ros catkin_ws || true
 RUN sudo apt-get update
-RUN sudo apt-get -y install openjdk-8-jdk
+RUN sudo apt-get -y install openjdk-8-jdk python-pip
 ENV HOME /home/jenkins
 RUN addgroup --system --gid 10000 jenkins
@ -105,5 +105,9 @@ USER root
 RUN curl https://sdk.cloud.google.com | bash && mv google-cloud-sdk /opt
 RUN gcloud components install kubectl
 RUN pip install --upgrade google-cloud-datastore
 RUN pip install --upgrade google-cloud-bigquery
 COPY jenkins/worker.py /worker.py
 # USER root
 ENTRYPOINT ["jenkins-slave"]
--- a/jenkins/worker.py
+++ b/jenkins/worker.py
@ -0,0 +1,247 @@
 """This is the script executed by workers of the quality control pipline."""
 import argparse
 import datetime
 import json
 from os.path import basename
 from pprint import pprint
 import re
 import subprocess
 from google.cloud import bigquery
 from google.cloud import datastore
 class Pattern(object):
  """Defines a pattern for regular expression matching."""
  def __init__(self, pattern):
    self.regex = re.compile(pattern, re.MULTILINE)
  def extract(self, inp):
    """Returns a dictionary of named capture groups to extracted output.
    Args:
      inp: input to parse
    Returns an empty dict of no match was found.
    """
    match = self.regex.search(inp)
    if match is None:
      return {}
    return match.groupdict()
 # Pattern matchers for the various fields of the '/usr/bin/time -v' output
 USER_TIME_PATTERN = Pattern(
    r'^\s*User time \(seconds\): (?P<user_time>\d+.\d+|\d+)')
 SYSTEM_TIME_PATTERN = Pattern(
    r'^\s*System time \(seconds\): (?P<system_time>\d+.\d+|\d+)')
 WALL_TIME_PATTERN = Pattern(
    r'^\s*Elapsed \(wall clock\) time \(h:mm:ss or m:ss\): '
    r'((?P<hours>\d{1,2}):|)(?P<minutes>\d{1,2}):(?P<seconds>\d{2}\.\d{2})')
 MAX_RES_SET_SIZE_PATTERN = Pattern(
    r'^\s*Maximum resident set size \(kbytes\): (?P<max_set_size>\d+)')
 # Pattern matcher for extracting the HEAD commit SHA-1 hash.
 GIT_SHA1_PATTERN = Pattern(r'^(?P<sha1>[0-9a-f]{40})\s+HEAD')
 def get_head_git_sha1():
  """Returns the SHA-1 hash of the commit tagged HEAD."""
  output = subprocess.check_output([
      'git', 'ls-remote',
      'https://github.com/googlecartographer/cartographer.git'
  ])
  parsed = GIT_SHA1_PATTERN.extract(output)
  return parsed['sha1']
 def extract_stats(inp):
  """Returns a dictionary of stats."""
  result = {}
  parsed = USER_TIME_PATTERN.extract(inp)
  result['user_time_secs'] = float(parsed['user_time'])
  parsed = SYSTEM_TIME_PATTERN.extract(inp)
  result['system_time_secs'] = float(parsed['system_time'])
  parsed = WALL_TIME_PATTERN.extract(inp)
  result['wall_time_secs'] = float(parsed['hours'] or 0.) * 3600 + float(
      parsed['minutes']) * 60 + float(parsed['seconds'])
  parsed = MAX_RES_SET_SIZE_PATTERN.extract(inp)
  result['max_set_size_kbytes'] = int(parsed['max_set_size'])
  return result
 def retrieve_entity(datastore_client, kind, identifier):
  """Convenience function for Datastore entity retrieval."""
  key = datastore_client.key(kind, identifier)
  return datastore_client.get(key)
 def create_job_selector(worker_id, num_workers):
  """Constructs a round-robin job selector."""
  return lambda job_id: job_id % num_workers == worker_id
 def run_cmd(cmd):
  """Runs command both printing its stdout output and returning it as string."""
  p = subprocess.Popen(
      cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
  run_cmd.output = []
  def process(line):
    run_cmd.output.append(line)
    print line.rstrip()
  while p.poll() is None:
    process(p.stdout.readline())
  process(p.stdout.read())
  return '\n'.join(run_cmd.output)
 class Job(object):
  """Represents a single job to be executed.
  A job consists of a combination of rosbag and configuration and launch files.
  """
  def __init__(self, datastore_client, job_id):
    self.id = job_id
    entity = retrieve_entity(datastore_client, 'Job', job_id)
    self.launch_file = entity['launch_file']
    self.assets_writer_launch_file = entity['assets_writer_launch_file']
    self.assets_writer_config_file = entity['assets_writer_config_file']
    self.rosbag = entity['rosbag']
  def __repr__(self):
    return 'Job: id : {} launch_file: {} rosbag: {}'.format(
        self.id, self.launch_file, self.rosbag)
  def run(self, ros_distro, run_id):
    """Runs the job with ROS distro 'ros_distro'."""
    print 'running job {}'.format(self.id)
    # Copy the rosbag to scratch space
    scratch_dir = '/data/{}'.format(self.id)
    rosbag_filename = basename(self.rosbag)
    run_cmd('mkdir {}'.format(scratch_dir))
    run_cmd('gsutil cp gs://{} {}/{}'.format(self.rosbag, scratch_dir,
                                             rosbag_filename))
    # Creates pbstream
    output = run_cmd(
        '/bin/bash -c \"source /opt/ros/{}/setup.bash && source '
        '/opt/cartographer_ros/setup.bash && /usr/bin/time -v roslaunch '
        'cartographer_ros {} bag_filenames:={}/{} no_rviz:=true\"'.format(
            ros_distro, self.launch_file, scratch_dir, rosbag_filename))
    # Creates assets.
    run_cmd('/bin/bash -c \"source /opt/ros/{}/setup.bash && source '
            '/opt/cartographer_ros/setup.bash && /usr/bin/time -v roslaunch '
            'cartographer_ros {} bag_filenames:={}/{} '
            'pose_graph_filename:={}/{}.pbstream config_file:={}\"'.format(
                ros_distro, self.assets_writer_launch_file, scratch_dir,
                rosbag_filename, scratch_dir, rosbag_filename,
                self.assets_writer_config_file))
    # Copies assets to bucket.
    run_cmd('gsutil cp {}/{}.pbstream '
            'gs://cartographer-ci-artifacts/{}/{}/{}.pbstream'.format(
                scratch_dir, rosbag_filename, run_id, self.id, rosbag_filename))
    run_cmd('gsutil cp {}/{}_* gs://cartographer-ci-artifacts/{}/{}/'.format(
        scratch_dir, rosbag_filename, run_id, self.id))
    info = extract_stats(output)
    info['rosbag'] = rosbag_filename
    return info
 class Worker(object):
  """Represents a single worker that executes a sequence of Jobs."""
  def __init__(self, datastore_client, pipeline_id, run_id):
    entity = retrieve_entity(datastore_client, 'PipelineConfig', pipeline_id)
    self.pipeline_id = pipeline_id
    self.jobs = [Job(datastore_client, job_id) for job_id in entity['jobs']]
    self.scratch_dir = entity['scratch_dir']
    self.ros_distro = entity['ros_distro']
    self.run_id = run_id
  def __repr__(self):
    result = 'Worker: pipeline_id: {}\n'.format(self.pipeline_id)
    for job in self.jobs:
      result += '{}\n'.format(str(job))
    return result
  def run_jobs(self, selector):
    outputs = {}
    for idx, job in enumerate(self.jobs):
      if selector(idx):
        output = job.run(self.ros_distro, self.run_id)
        outputs[job.id] = output
      else:
        print 'job {}: skip'.format(job.id)
    return outputs
 def publish_stats_to_big_query(stats_dict, now, head_sha1):
  """Publishes metrics to BigQuery."""
  bigquery_client = bigquery.Client()
  dataset = bigquery_client.dataset('Cartographer')
  table = dataset.table('metrics')
  rows = []
  for job_identifier, job_info in stats_dict.iteritems():
    data_string = """[
            \"{}-{}-{}\",
            \"{}\",
            {},
            \"{}\",
            {},
            {},
            {},
            {}
        ]""".format(now.year, now.month, now.day, head_sha1, job_identifier,
                    job_info['rosbag'], job_info['user_time_secs'],
                    job_info['system_time_secs'], job_info['wall_time_secs'],
                    job_info['max_set_size_kbytes'])
    data = json.loads(data_string)
    rows.append(data)
  table.reload()
  errors = table.insert_data(rows)
  if not errors:
    print 'Pushed {} row(s) into Cartographer:metrics'.format(len(rows))
  else:
    print 'Errors:'
    pprint(errors)
 def parse_arguments():
  """Parses the command line arguments."""
  parser = argparse.ArgumentParser(
      description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
  parser.add_argument('--worker_id', type=int)
  parser.add_argument('--num_workers', type=int)
  parser.add_argument('--pipeline_id', type=str)
  return parser.parse_args()
 def main():
  args = parse_arguments()
  ds_client = datastore.Client()
  job_selector = create_job_selector(int(args.worker_id), int(args.num_workers))
  head_sha1 = get_head_git_sha1()
  now = datetime.datetime.now()
  pipeline_run_id = '{}-{}-{}_{}'.format(now.year, now.month, now.day,
                                         head_sha1)
  worker = Worker(ds_client, args.pipeline_id, pipeline_run_id)
  stats_dict = worker.run_jobs(job_selector)
  publish_stats_to_big_query(stats_dict, now, head_sha1)
 if __name__ == '__main__':
  main()