SlideShare ist ein Scribd-Unternehmen logo
1 von 43
Benchy
Lightweight performing benchmark framework for
Python scripts
Marcel Caraciolo
@marcelcaraciolo
Developer, Cientist, contributor to the Crab recsys project,
works with Python for 6 years, interested at mobile,
education, machine learning and dataaaaa!
Recife, Brazil - http://aimotion.blogspot.com
About me
Co-founder of Crab - Python recsys library
Cientist Chief at Atepassar, e-learning social network
Co-Founder and Instructor of PyCursos, teaching Python on-line
Co-Founder of Pingmind, on-line infrastructure for MOOC’s
Interested at Python, mobile, e-learning and machine learning!
Why do we test ?
Freedom from fear
Testing for
performance
What made my
code slower ?
me
Solutions ?
In	
  [1]:	
  def	
  f(x):
	
  	
  	
  ...:	
  	
  	
  	
  	
  return	
  x*x
	
  	
  	
  ...:	
  
In	
  [2]:	
  %timeit	
  for	
  x	
  in	
  range
(100):	
  f(x)
100000	
  loops,	
  best	
  of	
  3:	
  20.3	
  us	
  
per	
  loop
Stop. Help is near
Performance benchmarks to compare several python code
alternatives
Generates graphs using matplotlib
Memory consumption, Performance timing available
https://github.com/python-recsys/benchy
Performance
benchmarks
Writing benchmarks
$	
  easy_install	
  -­‐U	
  benchy	
  
#	
  pip	
  install	
  -­‐U	
  benchy
Writing benchmarks
from	
  benchy.api	
  import	
  Benchmark
common_setup	
  =	
  ""
statement	
  =	
  "lst	
  =	
  ['i'	
  for	
  x	
  in	
  range(100000)]"
benchmark1	
  =	
  Benchmark(statement,	
  common_setup,	
  name=	
  "range")
statement	
  =	
  "lst	
  =	
  ['i'	
  for	
  x	
  in	
  xrange(100000)]"
benchmark2	
  =	
  Benchmark(statement,	
  common_setup,	
  name=	
  "xrange")
statement	
  =	
  "lst	
  =	
  ['i']	
  *	
  100000"
benchmark3	
  =	
  Benchmark(statement,	
  common_setup,	
  name=	
  "range")
Use them in your
workflow
[1]:	
  print	
  benchmark1.run()
{'memory':	
  {'repeat':	
  3,
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  'success':	
  True,
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  'units':	
  'MB',
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  'usage':	
  2.97265625},
	
  'runtime':	
  {'loops':	
  100,
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  'repeat':	
  3,
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  'success':	
  True,
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  'timing':	
  7.5653696060180664,
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  'units':	
  'ms'}}
Same code as %timeit
and %memit
Beautiful reports
rst_text	
  =	
  benchmark1.to_rst(results)
Benchmark suite
from	
  benchy.api	
  import	
  BenchmarkSuite
suite	
  =	
  BenchmarkSuite()
suite.append(benchmark1)
suite.append(benchmark2)
suite.append(benchmark3)
Run the benchmarks
from	
  benchy.api	
  import	
  BenchmarkRunner
runner	
  =	
  BenchmarkRunner(benchmarks=suite,	
  tmp_dir='.',	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  name=	
  'List	
  Allocation	
  Benchmark')
n_benchs,	
  results	
  =	
  runner.run()
Who is the faster ?
{Benchmark('list	
  with	
  "*"'):
	
  	
  	
  	
  {'runtime':	
  {'timing':	
  0.47582697868347168,	
  'repeat':	
  3,	
  'success':	
  True,	
  'loops':	
  1000,	
  
'timeBaselines':	
  1.0,	
  'units':	
  'ms'},
	
  	
  	
  	
  'memory':	
  {'usage':	
  0.3828125,	
  'units':	
  'MB',	
  'repeat':	
  3,	
  'success':	
  True}},
Benchmark('list	
  with	
  xrange'):
	
  	
  	
  	
  {'runtime':	
  {'timing':	
  5.623779296875,	
  'repeat':	
  3,	
  'success':	
  True,	
  'loops':	
  100,	
  
'timeBaselines':	
  11.818958463504936,	
  'units':	
  'ms'},
	
  	
  	
  	
  'memory':	
  {'usage':	
  0.71484375,	
  'units':	
  'MB',	
  'repeat':	
  3,	
  'success':	
  True}},
Benchmark('list	
  with	
  range'):	
  {
	
  	
  	
  	
  'runtime':	
  {'timing':	
  6.5933513641357422,	
  'repeat':	
  3,	
  'success':	
  True,	
  'loops':	
  100,	
  
'timeBaselines':	
  13.856615239384636,	
  'units':	
  'ms'},
	
  	
  	
  	
  'memory':	
  {'usage':	
  2.2109375,	
  'units':	
  'MB',	
  'repeat':	
  3,	
  'success':	
  True}}}
Plot relative
fig	
  =	
  runner.plot_relative(results,	
  horizontal=True)
plt.savefig('%s_r.png'	
  %	
  runner.name,	
  bbox_inches='tight')
Plot absolute
runner.plot_absolute(results,	
  horizontal=False)
plt.savefig('%s.png'	
  %	
  runner.name)	
  #	
  bbox_inches='tight')
Full report
rst_text	
  =	
  runner.to_rst(results,	
  runner.name	
  +	
  'png',
	
  	
  	
  	
  	
  	
  	
  	
  runner.name	
  +	
  '_r.png')
with	
  open('teste.rst',	
  'w')	
  as	
  f:
	
  	
  	
  	
  	
  	
  	
  	
  f.write(rst_text)
Full report
Full report
Why ?
Benchmark pairwise functions at Crab recsys library
http://aimotion.blogspot.com.br/2013/03/performing-runtime-benchmarks-with.html
Get involved
Create the benchmarks as TestCases
Check automatically for benchmark files and run like %nose.test()
More setup and teardown control
Group benchmarks at the same graph
Improvements
Added Database Handler
Added Git Support
Added New Runner
Run benchmarks
db.py
import	
  sqlite3
	
  
	
  
class	
  BenchmarkDb(object):
	
  	
  	
  	
  """
	
  	
  	
  	
  Persistence	
  handler	
  for	
  bechmark	
  results
	
  	
  	
  	
  """
	
  	
  	
  	
  def	
  _create_tables(self):
	
  	
  	
  	
  	
  	
  	
  	
  self._cursor.execute("drop	
  table	
  if	
  exists	
  benchmarksuites")
	
  	
  	
  	
  	
  	
  	
  	
  self._cursor.execute("drop	
  table	
  if	
  exists	
  benchmarks")
	
  	
  	
  	
  	
  	
  	
  	
  self._cursor.execute("drop	
  table	
  if	
  exists	
  results")
	
  	
  	
  	
  	
  	
  	
  	
  ...	
  	
  
	
  	
  	
  	
  	
  	
  	
  	
  self._cursor.execute('CREATE	
  TABLE	
  	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  benchmarks(checksum	
  text	
  PRIMARY	
  KEY,	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  name	
  text,	
  description	
  text,	
  suite_id	
  integer,	
  	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  FOREIGN	
  KEY(suite_id)	
  REFERENCES	
  benchmarksuites(id))')
	
  
	
  	
  	
  	
  	
  	
  	
  	
  self._cursor.execute('CREATE	
  TABLE	
  results(id	
  integer	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  PRIMARY	
  KEY	
  AUTOINCREMENT,	
  checksum	
  text,	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  timestamp	
  timestamp,	
  ncalls	
  text,	
  timing	
  float,	
  traceback	
  text,
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  FOREIGN	
  KEY(checksum)	
  REFERENCES	
  benchmarks(checksum))')
	
  
	
  	
  	
  	
  	
  	
  	
  	
  self._con.commit()
	
  
	
  	
  	
  	
  def	
  write_benchmark(self,	
  bm,	
  suite=None):
	
  	
  	
  	
  	
  	
  	
  	
  if	
  suite	
  is	
  not	
  None:
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  self._cursor.execute('SELECT	
  id	
  FROM	
  benchmarksuites	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  where	
  name	
  =	
  "%s"'	
  %	
  suite.name)
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  row	
  =	
  self._cursor.fetchone()
	
  	
  	
  	
  	
  	
  	
  	
  else:
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  row	
  =	
  None
	
  
	
  	
  	
  	
  	
  	
  	
  	
  if	
  row	
  ==	
  None:
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  self._cursor.execute('INSERT	
  INTO	
  benchmarks	
  VALUES	
  (?,	
  ?,	
  ?,	
  ?)',
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  (bm.checksum,	
  bm.name,	
  bm.description,	
  None))
	
  	
  	
  	
  	
  	
  	
  	
  else:
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  self._cursor.execute('INSERT	
  INTO	
  benchmarks	
  VALUES	
  (?,	
  ?,	
  ?,	
  ?)',
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  (bm.checksum,	
  bm.name,	
  bm.description,	
  row[0]))
Improvements
Added Database Handler
Added Git Support
Added New Runner
Run benchmarks
Git Repo
class	
  GitRepository(Repository):
	
  	
  	
  	
  """
	
  	
  	
  	
  Read	
  some	
  basic	
  statistics	
  about	
  a	
  git	
  repository
	
  	
  	
  	
  """
	
  
	
  	
  	
  	
  def	
  __init__(self,	
  repo_path):
	
  	
  	
  	
  	
  	
  	
  	
  self.repo_path	
  =	
  repo_path
	
  	
  	
  	
  	
  	
  	
  	
  self.git	
  =	
  _git_command(self.repo_path)
	
  	
  	
  	
  	
  	
  	
  	
  (self.shas,	
  self.messages,
	
  	
  	
  	
  	
  	
  	
  	
  	
  self.timestamps,	
  self.authors)	
  =	
  self._parse_commit_log()
[('d87fdf2', datetime.datetime(2013, 3, 22, 16, 55, 38)), ('a90a449', datetime.datetime(2013, 3, 22, 16, 54, 36)),
('fe66a86', datetime.datetime(2013, 3, 22, 16, 51, 2)), ('bea6b21', datetime.datetime(2013, 3, 22, 13, 14, 22)),
('bde5e63', datetime.datetime(2013, 3, 22, 5, 2, 56)), ('89634f6', datetime.datetime(2013, 3, 20, 4, 16, 19))]
Git Repo
class	
  BenchmarkRepository(object):
	
  	
  	
  	
  """
	
  	
  	
  	
  Manage	
  an	
  isolated	
  copy	
  of	
  a	
  repository	
  for	
  benchmarking
	
  	
  	
  	
  """
	
  	
  	
  	
  ...
	
  
	
  	
  	
  	
  def	
  _copy_repo(self):
	
  	
  	
  	
  	
  	
  	
  	
  if	
  os.path.exists(self.target_dir):
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  print	
  'Deleting	
  %s	
  first'	
  %	
  self.target_dir
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  #	
  response	
  =	
  raw_input('%s	
  exists,	
  delete?	
  y/n'	
  %	
  self.target_dir)
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  #	
  if	
  response	
  ==	
  'n':
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  #	
  	
  	
  	
  	
  raise	
  Exception('foo')
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  cmd	
  =	
  'rm	
  -­‐rf	
  %s'	
  %	
  self.target_dir
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  print	
  cmd
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  os.system(cmd)
	
  
	
  	
  	
  	
  	
  	
  	
  	
  self._clone(self.target_dir_tmp,	
  self.target_dir)
	
  	
  	
  	
  	
  	
  	
  	
  self._prep()
	
  	
  	
  	
  	
  	
  	
  	
  self._copy_benchmark_scripts_and_deps()
	
  
	
  	
  	
  	
  def	
  _clone(self,	
  source,	
  target):
	
  	
  	
  	
  	
  	
  	
  	
  cmd	
  =	
  'git	
  clone	
  %s	
  %s'	
  %	
  (source,	
  target)
	
  	
  	
  	
  	
  	
  	
  	
  print	
  cmd
	
  	
  	
  	
  	
  	
  	
  	
  os.system(cmd)
	
  
	
  	
  	
  	
  def	
  _copy_benchmark_scripts_and_deps(self):
	
  	
  	
  	
  	
  	
  	
  	
  pth,	
  _	
  =	
  os.path.split(os.path.abspath(__file__))
	
  	
  	
  	
  	
  	
  	
  	
  deps	
  =	
  [os.path.join(pth,	
  'run_benchmarks.py')]
	
  	
  	
  	
  	
  	
  	
  	
  if	
  self.dependencies	
  is	
  not	
  None:
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  deps.extend(self.dependencies)
	
  
	
  	
  	
  	
  	
  	
  	
  	
  for	
  dep	
  in	
  deps:
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  cmd	
  =	
  'cp	
  %s	
  %s'	
  %	
  (dep,	
  self.target_dir)
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  print	
  cmd
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  proc	
  =	
  subprocess.Popen(cmd,	
  shell=True)
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  proc.wait()
Improvements
Added Database Handler
Added Git Support
Added New Runner
Run benchmarks
New Runner
	
  
class	
  BenchmarkGitRunner(BenchmarkRunner):
	
  	
  ...
	
  	
  	
  
	
  	
  	
  	
  def	
  _register_benchmarks(self):
	
  	
  	
  	
  	
  	
  	
  	
  ex_benchmarks	
  =	
  self.db.get_benchmarks()
	
  	
  	
  	
  	
  	
  	
  	
  db_checksums	
  =	
  set(ex_benchmarks.index)
	
  	
  	
  	
  	
  	
  	
  	
  for	
  bm	
  in	
  self.benchmarks:
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  if	
  bm.checksum	
  in	
  db_checksums:
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  self.db.update_name(bm)
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  else:
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  print	
  'Writing	
  new	
  benchmark	
  %s,	
  %s'	
  %	
  (bm.name,	
  
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  bm.checksum)
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  self.db.write_benchmark(bm)
	
  
New runner
	
  
class	
  BenchmarkGitRunner(BenchmarkRunner):
	
  	
  ...
	
  	
  	
  
	
  	
  	
  	
  def	
  _run_revision(self,	
  rev):
	
  	
  	
  	
  	
  	
  	
  	
  need_to_run	
  =	
  self._get_benchmarks_for_rev(rev)
	
  
	
  	
  	
  	
  	
  	
  	
  	
  if	
  not	
  need_to_run:
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  print	
  'No	
  benchmarks	
  need	
  running	
  at	
  %s'	
  %	
  rev
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  return	
  0,	
  {}
	
  
	
  	
  	
  	
  	
  	
  	
  	
  print	
  'Running	
  %d	
  benchmarks	
  for	
  revision	
  %s'	
  %	
  (len(need_to_run),	
  rev)
	
  	
  	
  	
  	
  	
  	
  	
  for	
  bm	
  in	
  need_to_run:
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  print	
  bm.name
	
  
	
  	
  	
  	
  	
  	
  	
  	
  self.bench_repo.switch_to_revision(rev)
	
  
	
  	
  	
  	
  	
  	
  	
  	
  pickle_path	
  =	
  os.path.join(self.tmp_dir,	
  'benchmarks.pickle')
	
  	
  	
  	
  	
  	
  	
  	
  results_path	
  =	
  os.path.join(self.tmp_dir,	
  'results.pickle')
	
  	
  	
  	
  	
  	
  	
  	
  if	
  os.path.exists(results_path):
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  os.remove(results_path)
	
  	
  	
  	
  	
  	
  	
  	
  pickle.dump(need_to_run,	
  open(pickle_path,	
  'w'))
	
  
	
  	
  	
  	
  	
  	
  	
  	
  #	
  run	
  the	
  process
	
  	
  	
  	
  	
  	
  	
  	
  cmd	
  =	
  'python	
  %s/run_benchmarks.py	
  %s	
  %s'	
  %	
  (pickle_path,	
  results_path)
	
  	
  	
  	
  	
  	
  	
  	
  print	
  cmd
	
  	
  	
  	
  	
  	
  	
  	
  proc	
  =	
  subprocess.Popen(cmd,	
  stdout=subprocess.PIPE,
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  stderr=subprocess.PIPE,
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  shell=True,
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  cwd=self.tmp_dir)
	
  	
  	
  	
  	
  	
  	
  	
  stdout,	
  stderr	
  =	
  proc.communicate()
	
  
New runner	
  
class	
  BenchmarkGitRunner(BenchmarkRunner):
	
  	
  ...
	
  	
  	
  
	
  	
  	
  	
  def	
  _run_revision(self,	
  rev):
	
  	
  	
  	
  	
  	
  	
  	
  need_to_run	
  =	
  self._get_benchmarks_for_rev(rev)
	
  
	
  	
  	
  	
  	
  	
  	
  	
  if	
  not	
  need_to_run:
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  print	
  'No	
  benchmarks	
  need	
  running	
  at	
  %s'	
  %	
  rev
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  return	
  0,	
  {}
	
  
	
  	
  	
  	
  	
  	
  	
  	
  print	
  'Running	
  %d	
  benchmarks	
  for	
  revision	
  %s'	
  %	
  (len(need_to_run),	
  rev)
	
  	
  	
  	
  	
  	
  	
  	
  for	
  bm	
  in	
  need_to_run:
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  print	
  bm.name
	
  
	
  	
  	
  	
  	
  	
  	
  	
  self.bench_repo.switch_to_revision(rev)
	
  	
  	
  	
  	
  	
  	
  	
  #	
  run	
  the	
  process
	
  	
  	
  	
  	
  	
  	
  	
  cmd	
  =	
  'python	
  %s/run_benchmarks.py	
  %s	
  %s'	
  %	
  (pickle_path,	
  results_path)
	
  	
  	
  	
  	
  	
  	
  	
  print	
  cmd
	
  	
  	
  	
  	
  	
  	
  	
  proc	
  =	
  subprocess.Popen(cmd,	
  stdout=subprocess.PIPE,
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  stderr=subprocess.PIPE,
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  shell=True,
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  cwd=self.tmp_dir)
	
  	
  	
  	
  	
  	
  	
  	
  stdout,	
  stderr	
  =	
  proc.communicate()
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  if	
  stderr:
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  if	
  ("object	
  has	
  no	
  attribute"	
  in	
  stderr	
  or
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  'ImportError'	
  in	
  stderr):
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  print	
  stderr
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  print	
  'HARD	
  CLEANING!'
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  self.bench_repo.hard_clean()
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  print	
  stderr
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  if	
  not	
  os.path.exists(results_path):
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  print	
  'Failed	
  for	
  revision	
  %s'	
  %	
  rev
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  return	
  len(need_to_run),	
  {}
	
  	
  	
  	
  	
  	
  	
  	
  results	
  =	
  pickle.load(open(results_path,	
  'r'))
Improvements
Added Database Handler
Added Git Support
Added New Runner
Run benchmarks
Running
from	
  benchmark	
  import	
  Benchmark,	
  BenchmarkRepository,	
  BenchmarkGitRunner
try:
	
  	
  	
  	
  REPO_PATH	
  =	
  config.get('setup',	
  'repo_path')
	
  	
  	
  	
  REPO_URL	
  =	
  config.get('setup',	
  'repo_url')
	
  	
  	
  	
  DB_PATH	
  =	
  config.get('setup',	
  'db_path')
	
  	
  	
  	
  TMP_DIR	
  =	
  config.get('setup',	
  'tmp_dir')
except:
	
  	
  	
  	
  REPO_PATH	
  =	
  os.path.abspath(os.path.join(os.path.dirname(__file__),	
  
"../"))
	
  	
  	
  	
  REPO_URL	
  =	
  'git@github.com:python-­‐recsys/crab.git'
	
  	
  	
  	
  DB_PATH	
  =	
  os.path.join(REPO_PATH,	
  'suite/benchmarks.db')
	
  	
  	
  	
  TMP_DIR	
  =	
  os.path.join(HOME,	
  'tmp/base_benchy/')
PREPARE	
  =	
  """
python	
  setup.py	
  clean
"""
BUILD	
  =	
  """
python	
  setup.py	
  build_ext	
  -­‐-­‐inplace
"""
repo	
  =	
  BenchmarkRepository(REPO_PATH,	
  REPO_URL,	
  DB_PATH,	
  TMP_DIR)
Running
	
  	
  	
  	
  common_setup	
  =	
  """
	
  	
  	
  	
  	
  import	
  numpy
	
  	
  	
  	
  	
  from	
  crab.metrics	
  import	
  cosine_distances
	
  	
  	
  	
  	
  X	
  =	
  numpy.random.uniform(1,5,(1000,))
	
  	
  	
  	
  """
	
  
	
  	
  	
  	
  bench	
  =	
  Benchmark(statement,	
  setup_bk1,	
  name="Crab	
  
Cosine")
	
  
	
  	
  	
  	
  suite	
  =	
  BenchmarkSuite()
	
  	
  	
  	
  suite.append(bench)	
  	
  	
  	
  	
  
	
  	
  	
  	
  statement	
  =	
  "cosine_distances(X,	
  X)"
	
  
	
  	
  	
  	
  runner	
  =	
  BenchmarkGitRunner(suite,	
  '.',	
  'Absolute	
  
timing	
  in	
  ms')
	
  	
  	
  	
  n_benchs,	
  results	
  =	
  runner.run()
	
  
	
  	
  	
  	
  runner.plot_history(results)
	
  	
  	
  	
  plt.show()
Improvements
Historical commits from version control now
benchmarked
Working now:
Module detection
by_module	
  =	
  {}
benchmarks	
  =	
  []
modules	
  =	
  ['metrics',
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  'recommenders',
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  'similarities']
for	
  modname	
  in	
  modules:
	
  	
  	
  	
  ref	
  =	
  __import__(modname)
	
  	
  	
  	
  by_module[modname]	
  =	
  [v	
  for	
  v	
  in	
  ref.__dict__.values()
	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  	
  if	
  isinstance(v,	
  Benchmark)]
	
  	
  	
  	
  benchmarks.extend(by_module[modname])
for	
  bm	
  in	
  benchmarks:
	
  	
  	
  	
  assert(bm.name	
  is	
  not	
  None)
https://github.com/python-recsys/benchy
Forks and pull requests are welcomed!
Benchy
Lightweight performing benchmark framework for
Python scripts
Marcel Caraciolo
@marcelcaraciolo
Developer, Cientist, contributor to the Crab recsys project,
works with Python for 6 years, interested at mobile,
education, machine learning and dataaaaa!
Recife, Brazil - http://aimotion.blogspot.com

Weitere ähnliche Inhalte

Was ist angesagt?

Persisting Data on SQLite using Room
Persisting Data on SQLite using RoomPersisting Data on SQLite using Room
Persisting Data on SQLite using RoomNelson Glauber Leal
 
Building Real Time Systems on MongoDB Using the Oplog at Stripe
Building Real Time Systems on MongoDB Using the Oplog at StripeBuilding Real Time Systems on MongoDB Using the Oplog at Stripe
Building Real Time Systems on MongoDB Using the Oplog at StripeMongoDB
 
Building Real Time Systems on MongoDB Using the Oplog at Stripe
Building Real Time Systems on MongoDB Using the Oplog at StripeBuilding Real Time Systems on MongoDB Using the Oplog at Stripe
Building Real Time Systems on MongoDB Using the Oplog at StripeStripe
 
CONFidence 2015: DTrace + OSX = Fun - Andrzej Dyjak
CONFidence 2015: DTrace + OSX = Fun - Andrzej Dyjak   CONFidence 2015: DTrace + OSX = Fun - Andrzej Dyjak
CONFidence 2015: DTrace + OSX = Fun - Andrzej Dyjak PROIDEA
 
Python 내장 함수
Python 내장 함수Python 내장 함수
Python 내장 함수용 최
 
Presentation of the new OpenERP API. Raphael Collet, OpenERP
Presentation of the new OpenERP API. Raphael Collet, OpenERPPresentation of the new OpenERP API. Raphael Collet, OpenERP
Presentation of the new OpenERP API. Raphael Collet, OpenERPOdoo
 
Event Sourcing and Functional Programming
Event Sourcing and Functional ProgrammingEvent Sourcing and Functional Programming
Event Sourcing and Functional ProgrammingGlobalLogic Ukraine
 
Node 관계형 데이터베이스_바인딩
Node 관계형 데이터베이스_바인딩Node 관계형 데이터베이스_바인딩
Node 관계형 데이터베이스_바인딩HyeonSeok Choi
 
How to Bring Common UI Patterns to ADF
How to Bring Common UI Patterns to ADF How to Bring Common UI Patterns to ADF
How to Bring Common UI Patterns to ADF Luc Bors
 
Backbone.js: Run your Application Inside The Browser
Backbone.js: Run your Application Inside The BrowserBackbone.js: Run your Application Inside The Browser
Backbone.js: Run your Application Inside The BrowserHoward Lewis Ship
 
Formacion en movilidad: Conceptos de desarrollo en iOS (IV)
Formacion en movilidad: Conceptos de desarrollo en iOS (IV) Formacion en movilidad: Conceptos de desarrollo en iOS (IV)
Formacion en movilidad: Conceptos de desarrollo en iOS (IV) Mobivery
 
Dm adapter RubyConf.TW
Dm adapter RubyConf.TWDm adapter RubyConf.TW
Dm adapter RubyConf.TWcodingforrent
 
Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)
Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)
Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)MongoSF
 
Node js mongodriver
Node js mongodriverNode js mongodriver
Node js mongodriverchristkv
 
R (Shiny Package) - Server Side Code for Decision Support System
R (Shiny Package) - Server Side Code for Decision Support SystemR (Shiny Package) - Server Side Code for Decision Support System
R (Shiny Package) - Server Side Code for Decision Support SystemMaithreya Chakravarthula
 
R (Shiny Package) - UI Side Script for Decision Support System
R (Shiny Package) - UI Side Script for Decision Support SystemR (Shiny Package) - UI Side Script for Decision Support System
R (Shiny Package) - UI Side Script for Decision Support SystemMaithreya Chakravarthula
 

Was ist angesagt? (20)

Persisting Data on SQLite using Room
Persisting Data on SQLite using RoomPersisting Data on SQLite using Room
Persisting Data on SQLite using Room
 
Building Real Time Systems on MongoDB Using the Oplog at Stripe
Building Real Time Systems on MongoDB Using the Oplog at StripeBuilding Real Time Systems on MongoDB Using the Oplog at Stripe
Building Real Time Systems on MongoDB Using the Oplog at Stripe
 
Building Real Time Systems on MongoDB Using the Oplog at Stripe
Building Real Time Systems on MongoDB Using the Oplog at StripeBuilding Real Time Systems on MongoDB Using the Oplog at Stripe
Building Real Time Systems on MongoDB Using the Oplog at Stripe
 
CONFidence 2015: DTrace + OSX = Fun - Andrzej Dyjak
CONFidence 2015: DTrace + OSX = Fun - Andrzej Dyjak   CONFidence 2015: DTrace + OSX = Fun - Andrzej Dyjak
CONFidence 2015: DTrace + OSX = Fun - Andrzej Dyjak
 
Python 내장 함수
Python 내장 함수Python 내장 함수
Python 내장 함수
 
Presentation of the new OpenERP API. Raphael Collet, OpenERP
Presentation of the new OpenERP API. Raphael Collet, OpenERPPresentation of the new OpenERP API. Raphael Collet, OpenERP
Presentation of the new OpenERP API. Raphael Collet, OpenERP
 
Event Sourcing and Functional Programming
Event Sourcing and Functional ProgrammingEvent Sourcing and Functional Programming
Event Sourcing and Functional Programming
 
Node 관계형 데이터베이스_바인딩
Node 관계형 데이터베이스_바인딩Node 관계형 데이터베이스_바인딩
Node 관계형 데이터베이스_바인딩
 
How to Bring Common UI Patterns to ADF
How to Bring Common UI Patterns to ADF How to Bring Common UI Patterns to ADF
How to Bring Common UI Patterns to ADF
 
Backbone.js: Run your Application Inside The Browser
Backbone.js: Run your Application Inside The BrowserBackbone.js: Run your Application Inside The Browser
Backbone.js: Run your Application Inside The Browser
 
Formacion en movilidad: Conceptos de desarrollo en iOS (IV)
Formacion en movilidad: Conceptos de desarrollo en iOS (IV) Formacion en movilidad: Conceptos de desarrollo en iOS (IV)
Formacion en movilidad: Conceptos de desarrollo en iOS (IV)
 
Javascript
JavascriptJavascript
Javascript
 
Dm adapter RubyConf.TW
Dm adapter RubyConf.TWDm adapter RubyConf.TW
Dm adapter RubyConf.TW
 
Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)
Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)
Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)
 
Node js mongodriver
Node js mongodriverNode js mongodriver
Node js mongodriver
 
I os 04
I os 04I os 04
I os 04
 
Dm adapter
Dm adapterDm adapter
Dm adapter
 
R (Shiny Package) - Server Side Code for Decision Support System
R (Shiny Package) - Server Side Code for Decision Support SystemR (Shiny Package) - Server Side Code for Decision Support System
R (Shiny Package) - Server Side Code for Decision Support System
 
R (Shiny Package) - UI Side Script for Decision Support System
R (Shiny Package) - UI Side Script for Decision Support SystemR (Shiny Package) - UI Side Script for Decision Support System
R (Shiny Package) - UI Side Script for Decision Support System
 
Zend framework service
Zend framework serviceZend framework service
Zend framework service
 

Andere mochten auch

Learning Pool Social Care Seminar
Learning Pool Social Care SeminarLearning Pool Social Care Seminar
Learning Pool Social Care SeminarPaul McElvaney
 
Migration Intro
Migration IntroMigration Intro
Migration Introdazza50
 
Continuous Deployment at Disqus (Pylons Minicon)
Continuous Deployment at Disqus (Pylons Minicon)Continuous Deployment at Disqus (Pylons Minicon)
Continuous Deployment at Disqus (Pylons Minicon)zeeg
 
Managing multigenerations in the Barbadian workspace[1]
Managing multigenerations in the Barbadian workspace[1]Managing multigenerations in the Barbadian workspace[1]
Managing multigenerations in the Barbadian workspace[1]Ian Walcott-Skinner
 
Learning Pool's Mark Lynch and Kevin Gallagher on 'Next Generation DLE'.
Learning Pool's Mark Lynch and Kevin Gallagher on 'Next Generation DLE'.Learning Pool's Mark Lynch and Kevin Gallagher on 'Next Generation DLE'.
Learning Pool's Mark Lynch and Kevin Gallagher on 'Next Generation DLE'.Paul McElvaney
 
Learning Pool: Yorkshire & Humber Learning Platform Adult Social Care: E-lear...
Learning Pool: Yorkshire & Humber Learning Platform Adult Social Care: E-lear...Learning Pool: Yorkshire & Humber Learning Platform Adult Social Care: E-lear...
Learning Pool: Yorkshire & Humber Learning Platform Adult Social Care: E-lear...Paul McElvaney
 
Describing exercise
Describing exerciseDescribing exercise
Describing exerciseSussan Roo
 
Ued案例
Ued案例Ued案例
Ued案例yamingd
 
Learning Pool and Carers
Learning Pool and Carers Learning Pool and Carers
Learning Pool and Carers Paul McElvaney
 
נוכחות אונליין - המכללה לעסקים קטנים, המכללה למנהל
נוכחות אונליין - המכללה לעסקים קטנים, המכללה למנהל    נוכחות אונליין - המכללה לעסקים קטנים, המכללה למנהל
נוכחות אונליין - המכללה לעסקים קטנים, המכללה למנהל Udi Salant
 
Hari
HariHari
Harislomb
 
Developing Policy for Emerging Technologies
Developing Policy for Emerging TechnologiesDeveloping Policy for Emerging Technologies
Developing Policy for Emerging TechnologiesLovisa Williams
 
Paper Based Student Enrollment - Disgrace to Education Technology
Paper Based Student Enrollment - Disgrace to Education TechnologyPaper Based Student Enrollment - Disgrace to Education Technology
Paper Based Student Enrollment - Disgrace to Education TechnologyJinal Jhaveri
 
Workshop Colin 2 Feb 2009
Workshop Colin 2 Feb 2009Workshop Colin 2 Feb 2009
Workshop Colin 2 Feb 2009Sjef Kerkhofs
 

Andere mochten auch (20)

Learning Pool Social Care Seminar
Learning Pool Social Care SeminarLearning Pool Social Care Seminar
Learning Pool Social Care Seminar
 
Migration Intro
Migration IntroMigration Intro
Migration Intro
 
Continuous Deployment at Disqus (Pylons Minicon)
Continuous Deployment at Disqus (Pylons Minicon)Continuous Deployment at Disqus (Pylons Minicon)
Continuous Deployment at Disqus (Pylons Minicon)
 
Lecture 22
Lecture 22Lecture 22
Lecture 22
 
Managing multigenerations in the Barbadian workspace[1]
Managing multigenerations in the Barbadian workspace[1]Managing multigenerations in the Barbadian workspace[1]
Managing multigenerations in the Barbadian workspace[1]
 
Learning Pool's Mark Lynch and Kevin Gallagher on 'Next Generation DLE'.
Learning Pool's Mark Lynch and Kevin Gallagher on 'Next Generation DLE'.Learning Pool's Mark Lynch and Kevin Gallagher on 'Next Generation DLE'.
Learning Pool's Mark Lynch and Kevin Gallagher on 'Next Generation DLE'.
 
Learning Pool: Yorkshire & Humber Learning Platform Adult Social Care: E-lear...
Learning Pool: Yorkshire & Humber Learning Platform Adult Social Care: E-lear...Learning Pool: Yorkshire & Humber Learning Platform Adult Social Care: E-lear...
Learning Pool: Yorkshire & Humber Learning Platform Adult Social Care: E-lear...
 
Describing exercise
Describing exerciseDescribing exercise
Describing exercise
 
Ued案例
Ued案例Ued案例
Ued案例
 
Lecture 10
Lecture 10Lecture 10
Lecture 10
 
I gala premios peridotita
I gala premios peridotitaI gala premios peridotita
I gala premios peridotita
 
Learning Pool and Carers
Learning Pool and Carers Learning Pool and Carers
Learning Pool and Carers
 
נוכחות אונליין - המכללה לעסקים קטנים, המכללה למנהל
נוכחות אונליין - המכללה לעסקים קטנים, המכללה למנהל    נוכחות אונליין - המכללה לעסקים קטנים, המכללה למנהל
נוכחות אונליין - המכללה לעסקים קטנים, המכללה למנהל
 
Hari
HariHari
Hari
 
Developing Policy for Emerging Technologies
Developing Policy for Emerging TechnologiesDeveloping Policy for Emerging Technologies
Developing Policy for Emerging Technologies
 
Lecture 23
Lecture 23Lecture 23
Lecture 23
 
Divosa v1.3
Divosa v1.3Divosa v1.3
Divosa v1.3
 
Paper Based Student Enrollment - Disgrace to Education Technology
Paper Based Student Enrollment - Disgrace to Education TechnologyPaper Based Student Enrollment - Disgrace to Education Technology
Paper Based Student Enrollment - Disgrace to Education Technology
 
Workshop Colin 2 Feb 2009
Workshop Colin 2 Feb 2009Workshop Colin 2 Feb 2009
Workshop Colin 2 Feb 2009
 
Tema ii
Tema iiTema ii
Tema ii
 

Ähnlich wie Benchy, python framework for performance benchmarking of Python Scripts

Benchy: Lightweight framework for Performance Benchmarks
Benchy: Lightweight framework for Performance Benchmarks Benchy: Lightweight framework for Performance Benchmarks
Benchy: Lightweight framework for Performance Benchmarks Marcel Caraciolo
 
Viktor Tsykunov: Azure Machine Learning Service
Viktor Tsykunov: Azure Machine Learning ServiceViktor Tsykunov: Azure Machine Learning Service
Viktor Tsykunov: Azure Machine Learning ServiceLviv Startup Club
 
Designing REST API automation tests in Kotlin
Designing REST API automation tests in KotlinDesigning REST API automation tests in Kotlin
Designing REST API automation tests in KotlinDmitriy Sobko
 
Fun Teaching MongoDB New Tricks
Fun Teaching MongoDB New TricksFun Teaching MongoDB New Tricks
Fun Teaching MongoDB New TricksMongoDB
 
Protractor framework – how to make stable e2e tests for Angular applications
Protractor framework – how to make stable e2e tests for Angular applicationsProtractor framework – how to make stable e2e tests for Angular applications
Protractor framework – how to make stable e2e tests for Angular applicationsLudmila Nesvitiy
 
Programming with Python and PostgreSQL
Programming with Python and PostgreSQLProgramming with Python and PostgreSQL
Programming with Python and PostgreSQLPeter Eisentraut
 
Data visualization in python/Django
Data visualization in python/DjangoData visualization in python/Django
Data visualization in python/Djangokenluck2001
 
Inside PyMongo - MongoNYC
Inside PyMongo - MongoNYCInside PyMongo - MongoNYC
Inside PyMongo - MongoNYCMike Dirolf
 
Design Summit - Rails 4 Migration - Aaron Patterson
Design Summit - Rails 4 Migration - Aaron PattersonDesign Summit - Rails 4 Migration - Aaron Patterson
Design Summit - Rails 4 Migration - Aaron PattersonManageIQ
 
Azure machine learning service
Azure machine learning serviceAzure machine learning service
Azure machine learning serviceRuth Yakubu
 
MongoDB hearts Django? (Django NYC)
MongoDB hearts Django? (Django NYC)MongoDB hearts Django? (Django NYC)
MongoDB hearts Django? (Django NYC)Mike Dirolf
 
Art & music vs Google App Engine
Art & music vs Google App EngineArt & music vs Google App Engine
Art & music vs Google App Enginethomas alisi
 
Unsupervised Aspect Based Sentiment Analysis at Scale
Unsupervised Aspect Based Sentiment Analysis at ScaleUnsupervised Aspect Based Sentiment Analysis at Scale
Unsupervised Aspect Based Sentiment Analysis at ScaleAaron (Ari) Bornstein
 
Python Development (MongoSF)
Python Development (MongoSF)Python Development (MongoSF)
Python Development (MongoSF)Mike Dirolf
 
The Ring programming language version 1.6 book - Part 46 of 189
The Ring programming language version 1.6 book - Part 46 of 189The Ring programming language version 1.6 book - Part 46 of 189
The Ring programming language version 1.6 book - Part 46 of 189Mahmoud Samir Fayed
 
Nyc open-data-2015-andvanced-sklearn-expanded
Nyc open-data-2015-andvanced-sklearn-expandedNyc open-data-2015-andvanced-sklearn-expanded
Nyc open-data-2015-andvanced-sklearn-expandedVivian S. Zhang
 
pytest로 파이썬 코드 테스트하기
pytest로 파이썬 코드 테스트하기pytest로 파이썬 코드 테스트하기
pytest로 파이썬 코드 테스트하기Yeongseon Choe
 

Ähnlich wie Benchy, python framework for performance benchmarking of Python Scripts (20)

Benchy: Lightweight framework for Performance Benchmarks
Benchy: Lightweight framework for Performance Benchmarks Benchy: Lightweight framework for Performance Benchmarks
Benchy: Lightweight framework for Performance Benchmarks
 
Viktor Tsykunov: Azure Machine Learning Service
Viktor Tsykunov: Azure Machine Learning ServiceViktor Tsykunov: Azure Machine Learning Service
Viktor Tsykunov: Azure Machine Learning Service
 
Designing REST API automation tests in Kotlin
Designing REST API automation tests in KotlinDesigning REST API automation tests in Kotlin
Designing REST API automation tests in Kotlin
 
Fun Teaching MongoDB New Tricks
Fun Teaching MongoDB New TricksFun Teaching MongoDB New Tricks
Fun Teaching MongoDB New Tricks
 
Protractor framework – how to make stable e2e tests for Angular applications
Protractor framework – how to make stable e2e tests for Angular applicationsProtractor framework – how to make stable e2e tests for Angular applications
Protractor framework – how to make stable e2e tests for Angular applications
 
Programming with Python and PostgreSQL
Programming with Python and PostgreSQLProgramming with Python and PostgreSQL
Programming with Python and PostgreSQL
 
Data visualization in python/Django
Data visualization in python/DjangoData visualization in python/Django
Data visualization in python/Django
 
Inside PyMongo - MongoNYC
Inside PyMongo - MongoNYCInside PyMongo - MongoNYC
Inside PyMongo - MongoNYC
 
Design Summit - Rails 4 Migration - Aaron Patterson
Design Summit - Rails 4 Migration - Aaron PattersonDesign Summit - Rails 4 Migration - Aaron Patterson
Design Summit - Rails 4 Migration - Aaron Patterson
 
What's new in Django 1.2?
What's new in Django 1.2?What's new in Django 1.2?
What's new in Django 1.2?
 
Azure machine learning service
Azure machine learning serviceAzure machine learning service
Azure machine learning service
 
Deep Learning for Computer Vision: Software Frameworks (UPC 2016)
Deep Learning for Computer Vision: Software Frameworks (UPC 2016)Deep Learning for Computer Vision: Software Frameworks (UPC 2016)
Deep Learning for Computer Vision: Software Frameworks (UPC 2016)
 
MongoDB hearts Django? (Django NYC)
MongoDB hearts Django? (Django NYC)MongoDB hearts Django? (Django NYC)
MongoDB hearts Django? (Django NYC)
 
Art & music vs Google App Engine
Art & music vs Google App EngineArt & music vs Google App Engine
Art & music vs Google App Engine
 
Unsupervised Aspect Based Sentiment Analysis at Scale
Unsupervised Aspect Based Sentiment Analysis at ScaleUnsupervised Aspect Based Sentiment Analysis at Scale
Unsupervised Aspect Based Sentiment Analysis at Scale
 
Python Development (MongoSF)
Python Development (MongoSF)Python Development (MongoSF)
Python Development (MongoSF)
 
The Ring programming language version 1.6 book - Part 46 of 189
The Ring programming language version 1.6 book - Part 46 of 189The Ring programming language version 1.6 book - Part 46 of 189
The Ring programming language version 1.6 book - Part 46 of 189
 
Nyc open-data-2015-andvanced-sklearn-expanded
Nyc open-data-2015-andvanced-sklearn-expandedNyc open-data-2015-andvanced-sklearn-expanded
Nyc open-data-2015-andvanced-sklearn-expanded
 
Django Pro ORM
Django Pro ORMDjango Pro ORM
Django Pro ORM
 
pytest로 파이썬 코드 테스트하기
pytest로 파이썬 코드 테스트하기pytest로 파이썬 코드 테스트하기
pytest로 파이썬 코드 테스트하기
 

Mehr von Marcel Caraciolo

Como interpretar seu próprio genoma com Python
Como interpretar seu próprio genoma com PythonComo interpretar seu próprio genoma com Python
Como interpretar seu próprio genoma com PythonMarcel Caraciolo
 
Joblib: Lightweight pipelining for parallel jobs (v2)
Joblib:  Lightweight pipelining for parallel jobs (v2)Joblib:  Lightweight pipelining for parallel jobs (v2)
Joblib: Lightweight pipelining for parallel jobs (v2)Marcel Caraciolo
 
Construindo softwares de bioinformática para análises clínicas : Desafios e...
Construindo softwares  de bioinformática  para análises clínicas : Desafios e...Construindo softwares  de bioinformática  para análises clínicas : Desafios e...
Construindo softwares de bioinformática para análises clínicas : Desafios e...Marcel Caraciolo
 
Como Python ajudou a automatizar o nosso laboratório v.2
Como Python ajudou a automatizar o nosso laboratório v.2Como Python ajudou a automatizar o nosso laboratório v.2
Como Python ajudou a automatizar o nosso laboratório v.2Marcel Caraciolo
 
Como Python pode ajudar na automação do seu laboratório
Como Python pode ajudar na automação do  seu laboratórioComo Python pode ajudar na automação do  seu laboratório
Como Python pode ajudar na automação do seu laboratórioMarcel Caraciolo
 
Python on Science ? Yes, We can.
Python on Science ?   Yes, We can.Python on Science ?   Yes, We can.
Python on Science ? Yes, We can.Marcel Caraciolo
 
Oficina Python: Hackeando a Web com Python 3
Oficina Python: Hackeando a Web com Python 3Oficina Python: Hackeando a Web com Python 3
Oficina Python: Hackeando a Web com Python 3Marcel Caraciolo
 
Recommender Systems with Ruby (adding machine learning, statistics, etc)
Recommender Systems with Ruby (adding machine learning, statistics, etc)Recommender Systems with Ruby (adding machine learning, statistics, etc)
Recommender Systems with Ruby (adding machine learning, statistics, etc)Marcel Caraciolo
 
Opensource - Como começar e dá dinheiro ?
Opensource - Como começar e dá dinheiro ?Opensource - Como começar e dá dinheiro ?
Opensource - Como começar e dá dinheiro ?Marcel Caraciolo
 
Python e 10 motivos por que devo conhece-la ?
Python e 10 motivos por que devo conhece-la ?Python e 10 motivos por que devo conhece-la ?
Python e 10 motivos por que devo conhece-la ?Marcel Caraciolo
 
GeoMapper, Python Script for Visualizing Data on Social Networks with Geo-loc...
GeoMapper, Python Script for Visualizing Data on Social Networks with Geo-loc...GeoMapper, Python Script for Visualizing Data on Social Networks with Geo-loc...
GeoMapper, Python Script for Visualizing Data on Social Networks with Geo-loc...Marcel Caraciolo
 
Construindo Sistemas de Recomendação com Python
Construindo Sistemas de Recomendação com PythonConstruindo Sistemas de Recomendação com Python
Construindo Sistemas de Recomendação com PythonMarcel Caraciolo
 
Python, A pílula Azul da programação
Python, A pílula Azul da programaçãoPython, A pílula Azul da programação
Python, A pílula Azul da programaçãoMarcel Caraciolo
 
Construindo Soluções Científicas com Big Data & MapReduce
Construindo Soluções Científicas com Big Data & MapReduceConstruindo Soluções Científicas com Big Data & MapReduce
Construindo Soluções Científicas com Big Data & MapReduceMarcel Caraciolo
 
Como Python está mudando a forma de aprendizagem à distância no Brasil
Como Python está mudando a forma de aprendizagem à distância no BrasilComo Python está mudando a forma de aprendizagem à distância no Brasil
Como Python está mudando a forma de aprendizagem à distância no BrasilMarcel Caraciolo
 
Novas Tendências para a Educação a Distância: Como reinventar a educação ?
Novas Tendências para a Educação a Distância: Como reinventar a educação ?Novas Tendências para a Educação a Distância: Como reinventar a educação ?
Novas Tendências para a Educação a Distância: Como reinventar a educação ?Marcel Caraciolo
 
Aula WebCrawlers com Regex - PyCursos
Aula WebCrawlers com Regex - PyCursosAula WebCrawlers com Regex - PyCursos
Aula WebCrawlers com Regex - PyCursosMarcel Caraciolo
 
Arquivos Zip com Python - Aula PyCursos
Arquivos Zip com Python - Aula PyCursosArquivos Zip com Python - Aula PyCursos
Arquivos Zip com Python - Aula PyCursosMarcel Caraciolo
 
PyFoursquare: Python Library for Foursquare
PyFoursquare: Python Library for FoursquarePyFoursquare: Python Library for Foursquare
PyFoursquare: Python Library for FoursquareMarcel Caraciolo
 

Mehr von Marcel Caraciolo (20)

Como interpretar seu próprio genoma com Python
Como interpretar seu próprio genoma com PythonComo interpretar seu próprio genoma com Python
Como interpretar seu próprio genoma com Python
 
Joblib: Lightweight pipelining for parallel jobs (v2)
Joblib:  Lightweight pipelining for parallel jobs (v2)Joblib:  Lightweight pipelining for parallel jobs (v2)
Joblib: Lightweight pipelining for parallel jobs (v2)
 
Construindo softwares de bioinformática para análises clínicas : Desafios e...
Construindo softwares  de bioinformática  para análises clínicas : Desafios e...Construindo softwares  de bioinformática  para análises clínicas : Desafios e...
Construindo softwares de bioinformática para análises clínicas : Desafios e...
 
Como Python ajudou a automatizar o nosso laboratório v.2
Como Python ajudou a automatizar o nosso laboratório v.2Como Python ajudou a automatizar o nosso laboratório v.2
Como Python ajudou a automatizar o nosso laboratório v.2
 
Como Python pode ajudar na automação do seu laboratório
Como Python pode ajudar na automação do  seu laboratórioComo Python pode ajudar na automação do  seu laboratório
Como Python pode ajudar na automação do seu laboratório
 
Python on Science ? Yes, We can.
Python on Science ?   Yes, We can.Python on Science ?   Yes, We can.
Python on Science ? Yes, We can.
 
Oficina Python: Hackeando a Web com Python 3
Oficina Python: Hackeando a Web com Python 3Oficina Python: Hackeando a Web com Python 3
Oficina Python: Hackeando a Web com Python 3
 
Recommender Systems with Ruby (adding machine learning, statistics, etc)
Recommender Systems with Ruby (adding machine learning, statistics, etc)Recommender Systems with Ruby (adding machine learning, statistics, etc)
Recommender Systems with Ruby (adding machine learning, statistics, etc)
 
Opensource - Como começar e dá dinheiro ?
Opensource - Como começar e dá dinheiro ?Opensource - Como começar e dá dinheiro ?
Opensource - Como começar e dá dinheiro ?
 
Big Data com Python
Big Data com PythonBig Data com Python
Big Data com Python
 
Python e 10 motivos por que devo conhece-la ?
Python e 10 motivos por que devo conhece-la ?Python e 10 motivos por que devo conhece-la ?
Python e 10 motivos por que devo conhece-la ?
 
GeoMapper, Python Script for Visualizing Data on Social Networks with Geo-loc...
GeoMapper, Python Script for Visualizing Data on Social Networks with Geo-loc...GeoMapper, Python Script for Visualizing Data on Social Networks with Geo-loc...
GeoMapper, Python Script for Visualizing Data on Social Networks with Geo-loc...
 
Construindo Sistemas de Recomendação com Python
Construindo Sistemas de Recomendação com PythonConstruindo Sistemas de Recomendação com Python
Construindo Sistemas de Recomendação com Python
 
Python, A pílula Azul da programação
Python, A pílula Azul da programaçãoPython, A pílula Azul da programação
Python, A pílula Azul da programação
 
Construindo Soluções Científicas com Big Data & MapReduce
Construindo Soluções Científicas com Big Data & MapReduceConstruindo Soluções Científicas com Big Data & MapReduce
Construindo Soluções Científicas com Big Data & MapReduce
 
Como Python está mudando a forma de aprendizagem à distância no Brasil
Como Python está mudando a forma de aprendizagem à distância no BrasilComo Python está mudando a forma de aprendizagem à distância no Brasil
Como Python está mudando a forma de aprendizagem à distância no Brasil
 
Novas Tendências para a Educação a Distância: Como reinventar a educação ?
Novas Tendências para a Educação a Distância: Como reinventar a educação ?Novas Tendências para a Educação a Distância: Como reinventar a educação ?
Novas Tendências para a Educação a Distância: Como reinventar a educação ?
 
Aula WebCrawlers com Regex - PyCursos
Aula WebCrawlers com Regex - PyCursosAula WebCrawlers com Regex - PyCursos
Aula WebCrawlers com Regex - PyCursos
 
Arquivos Zip com Python - Aula PyCursos
Arquivos Zip com Python - Aula PyCursosArquivos Zip com Python - Aula PyCursos
Arquivos Zip com Python - Aula PyCursos
 
PyFoursquare: Python Library for Foursquare
PyFoursquare: Python Library for FoursquarePyFoursquare: Python Library for Foursquare
PyFoursquare: Python Library for Foursquare
 

Kürzlich hochgeladen

IAC 2024 - IA Fast Track to Search Focused AI Solutions
IAC 2024 - IA Fast Track to Search Focused AI SolutionsIAC 2024 - IA Fast Track to Search Focused AI Solutions
IAC 2024 - IA Fast Track to Search Focused AI SolutionsEnterprise Knowledge
 
The Codex of Business Writing Software for Real-World Solutions 2.pptx
The Codex of Business Writing Software for Real-World Solutions 2.pptxThe Codex of Business Writing Software for Real-World Solutions 2.pptx
The Codex of Business Writing Software for Real-World Solutions 2.pptxMalak Abu Hammad
 
[2024]Digital Global Overview Report 2024 Meltwater.pdf
[2024]Digital Global Overview Report 2024 Meltwater.pdf[2024]Digital Global Overview Report 2024 Meltwater.pdf
[2024]Digital Global Overview Report 2024 Meltwater.pdfhans926745
 
04-2024-HHUG-Sales-and-Marketing-Alignment.pptx
04-2024-HHUG-Sales-and-Marketing-Alignment.pptx04-2024-HHUG-Sales-and-Marketing-Alignment.pptx
04-2024-HHUG-Sales-and-Marketing-Alignment.pptxHampshireHUG
 
Handwritten Text Recognition for manuscripts and early printed texts
Handwritten Text Recognition for manuscripts and early printed textsHandwritten Text Recognition for manuscripts and early printed texts
Handwritten Text Recognition for manuscripts and early printed textsMaria Levchenko
 
The Role of Taxonomy and Ontology in Semantic Layers - Heather Hedden.pdf
The Role of Taxonomy and Ontology in Semantic Layers - Heather Hedden.pdfThe Role of Taxonomy and Ontology in Semantic Layers - Heather Hedden.pdf
The Role of Taxonomy and Ontology in Semantic Layers - Heather Hedden.pdfEnterprise Knowledge
 
Boost PC performance: How more available memory can improve productivity
Boost PC performance: How more available memory can improve productivityBoost PC performance: How more available memory can improve productivity
Boost PC performance: How more available memory can improve productivityPrincipled Technologies
 
Slack Application Development 101 Slides
Slack Application Development 101 SlidesSlack Application Development 101 Slides
Slack Application Development 101 Slidespraypatel2
 
GenCyber Cyber Security Day Presentation
GenCyber Cyber Security Day PresentationGenCyber Cyber Security Day Presentation
GenCyber Cyber Security Day PresentationMichael W. Hawkins
 
CNv6 Instructor Chapter 6 Quality of Service
CNv6 Instructor Chapter 6 Quality of ServiceCNv6 Instructor Chapter 6 Quality of Service
CNv6 Instructor Chapter 6 Quality of Servicegiselly40
 
Presentation on how to chat with PDF using ChatGPT code interpreter
Presentation on how to chat with PDF using ChatGPT code interpreterPresentation on how to chat with PDF using ChatGPT code interpreter
Presentation on how to chat with PDF using ChatGPT code interpreternaman860154
 
Histor y of HAM Radio presentation slide
Histor y of HAM Radio presentation slideHistor y of HAM Radio presentation slide
Histor y of HAM Radio presentation slidevu2urc
 
How to convert PDF to text with Nanonets
How to convert PDF to text with NanonetsHow to convert PDF to text with Nanonets
How to convert PDF to text with Nanonetsnaman860154
 
Data Cloud, More than a CDP by Matt Robison
Data Cloud, More than a CDP by Matt RobisonData Cloud, More than a CDP by Matt Robison
Data Cloud, More than a CDP by Matt RobisonAnna Loughnan Colquhoun
 
Partners Life - Insurer Innovation Award 2024
Partners Life - Insurer Innovation Award 2024Partners Life - Insurer Innovation Award 2024
Partners Life - Insurer Innovation Award 2024The Digital Insurer
 
08448380779 Call Girls In Civil Lines Women Seeking Men
08448380779 Call Girls In Civil Lines Women Seeking Men08448380779 Call Girls In Civil Lines Women Seeking Men
08448380779 Call Girls In Civil Lines Women Seeking MenDelhi Call girls
 
Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...
Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...
Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...Miguel Araújo
 
TrustArc Webinar - Stay Ahead of US State Data Privacy Law Developments
TrustArc Webinar - Stay Ahead of US State Data Privacy Law DevelopmentsTrustArc Webinar - Stay Ahead of US State Data Privacy Law Developments
TrustArc Webinar - Stay Ahead of US State Data Privacy Law DevelopmentsTrustArc
 
08448380779 Call Girls In Greater Kailash - I Women Seeking Men
08448380779 Call Girls In Greater Kailash - I Women Seeking Men08448380779 Call Girls In Greater Kailash - I Women Seeking Men
08448380779 Call Girls In Greater Kailash - I Women Seeking MenDelhi Call girls
 
Exploring the Future Potential of AI-Enabled Smartphone Processors
Exploring the Future Potential of AI-Enabled Smartphone ProcessorsExploring the Future Potential of AI-Enabled Smartphone Processors
Exploring the Future Potential of AI-Enabled Smartphone Processorsdebabhi2
 

Kürzlich hochgeladen (20)

IAC 2024 - IA Fast Track to Search Focused AI Solutions
IAC 2024 - IA Fast Track to Search Focused AI SolutionsIAC 2024 - IA Fast Track to Search Focused AI Solutions
IAC 2024 - IA Fast Track to Search Focused AI Solutions
 
The Codex of Business Writing Software for Real-World Solutions 2.pptx
The Codex of Business Writing Software for Real-World Solutions 2.pptxThe Codex of Business Writing Software for Real-World Solutions 2.pptx
The Codex of Business Writing Software for Real-World Solutions 2.pptx
 
[2024]Digital Global Overview Report 2024 Meltwater.pdf
[2024]Digital Global Overview Report 2024 Meltwater.pdf[2024]Digital Global Overview Report 2024 Meltwater.pdf
[2024]Digital Global Overview Report 2024 Meltwater.pdf
 
04-2024-HHUG-Sales-and-Marketing-Alignment.pptx
04-2024-HHUG-Sales-and-Marketing-Alignment.pptx04-2024-HHUG-Sales-and-Marketing-Alignment.pptx
04-2024-HHUG-Sales-and-Marketing-Alignment.pptx
 
Handwritten Text Recognition for manuscripts and early printed texts
Handwritten Text Recognition for manuscripts and early printed textsHandwritten Text Recognition for manuscripts and early printed texts
Handwritten Text Recognition for manuscripts and early printed texts
 
The Role of Taxonomy and Ontology in Semantic Layers - Heather Hedden.pdf
The Role of Taxonomy and Ontology in Semantic Layers - Heather Hedden.pdfThe Role of Taxonomy and Ontology in Semantic Layers - Heather Hedden.pdf
The Role of Taxonomy and Ontology in Semantic Layers - Heather Hedden.pdf
 
Boost PC performance: How more available memory can improve productivity
Boost PC performance: How more available memory can improve productivityBoost PC performance: How more available memory can improve productivity
Boost PC performance: How more available memory can improve productivity
 
Slack Application Development 101 Slides
Slack Application Development 101 SlidesSlack Application Development 101 Slides
Slack Application Development 101 Slides
 
GenCyber Cyber Security Day Presentation
GenCyber Cyber Security Day PresentationGenCyber Cyber Security Day Presentation
GenCyber Cyber Security Day Presentation
 
CNv6 Instructor Chapter 6 Quality of Service
CNv6 Instructor Chapter 6 Quality of ServiceCNv6 Instructor Chapter 6 Quality of Service
CNv6 Instructor Chapter 6 Quality of Service
 
Presentation on how to chat with PDF using ChatGPT code interpreter
Presentation on how to chat with PDF using ChatGPT code interpreterPresentation on how to chat with PDF using ChatGPT code interpreter
Presentation on how to chat with PDF using ChatGPT code interpreter
 
Histor y of HAM Radio presentation slide
Histor y of HAM Radio presentation slideHistor y of HAM Radio presentation slide
Histor y of HAM Radio presentation slide
 
How to convert PDF to text with Nanonets
How to convert PDF to text with NanonetsHow to convert PDF to text with Nanonets
How to convert PDF to text with Nanonets
 
Data Cloud, More than a CDP by Matt Robison
Data Cloud, More than a CDP by Matt RobisonData Cloud, More than a CDP by Matt Robison
Data Cloud, More than a CDP by Matt Robison
 
Partners Life - Insurer Innovation Award 2024
Partners Life - Insurer Innovation Award 2024Partners Life - Insurer Innovation Award 2024
Partners Life - Insurer Innovation Award 2024
 
08448380779 Call Girls In Civil Lines Women Seeking Men
08448380779 Call Girls In Civil Lines Women Seeking Men08448380779 Call Girls In Civil Lines Women Seeking Men
08448380779 Call Girls In Civil Lines Women Seeking Men
 
Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...
Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...
Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...
 
TrustArc Webinar - Stay Ahead of US State Data Privacy Law Developments
TrustArc Webinar - Stay Ahead of US State Data Privacy Law DevelopmentsTrustArc Webinar - Stay Ahead of US State Data Privacy Law Developments
TrustArc Webinar - Stay Ahead of US State Data Privacy Law Developments
 
08448380779 Call Girls In Greater Kailash - I Women Seeking Men
08448380779 Call Girls In Greater Kailash - I Women Seeking Men08448380779 Call Girls In Greater Kailash - I Women Seeking Men
08448380779 Call Girls In Greater Kailash - I Women Seeking Men
 
Exploring the Future Potential of AI-Enabled Smartphone Processors
Exploring the Future Potential of AI-Enabled Smartphone ProcessorsExploring the Future Potential of AI-Enabled Smartphone Processors
Exploring the Future Potential of AI-Enabled Smartphone Processors
 

Benchy, python framework for performance benchmarking of Python Scripts

  • 1. Benchy Lightweight performing benchmark framework for Python scripts Marcel Caraciolo @marcelcaraciolo Developer, Cientist, contributor to the Crab recsys project, works with Python for 6 years, interested at mobile, education, machine learning and dataaaaa! Recife, Brazil - http://aimotion.blogspot.com
  • 2. About me Co-founder of Crab - Python recsys library Cientist Chief at Atepassar, e-learning social network Co-Founder and Instructor of PyCursos, teaching Python on-line Co-Founder of Pingmind, on-line infrastructure for MOOC’s Interested at Python, mobile, e-learning and machine learning!
  • 3. Why do we test ?
  • 6. What made my code slower ?
  • 7.
  • 8. me
  • 9. Solutions ? In  [1]:  def  f(x):      ...:          return  x*x      ...:   In  [2]:  %timeit  for  x  in  range (100):  f(x) 100000  loops,  best  of  3:  20.3  us   per  loop
  • 10. Stop. Help is near Performance benchmarks to compare several python code alternatives Generates graphs using matplotlib Memory consumption, Performance timing available https://github.com/python-recsys/benchy
  • 12. Writing benchmarks $  easy_install  -­‐U  benchy   #  pip  install  -­‐U  benchy
  • 13. Writing benchmarks from  benchy.api  import  Benchmark common_setup  =  "" statement  =  "lst  =  ['i'  for  x  in  range(100000)]" benchmark1  =  Benchmark(statement,  common_setup,  name=  "range") statement  =  "lst  =  ['i'  for  x  in  xrange(100000)]" benchmark2  =  Benchmark(statement,  common_setup,  name=  "xrange") statement  =  "lst  =  ['i']  *  100000" benchmark3  =  Benchmark(statement,  common_setup,  name=  "range")
  • 14. Use them in your workflow [1]:  print  benchmark1.run() {'memory':  {'repeat':  3,                        'success':  True,                        'units':  'MB',                        'usage':  2.97265625},  'runtime':  {'loops':  100,                          'repeat':  3,                          'success':  True,                          'timing':  7.5653696060180664,                          'units':  'ms'}} Same code as %timeit and %memit
  • 15. Beautiful reports rst_text  =  benchmark1.to_rst(results)
  • 16. Benchmark suite from  benchy.api  import  BenchmarkSuite suite  =  BenchmarkSuite() suite.append(benchmark1) suite.append(benchmark2) suite.append(benchmark3)
  • 17. Run the benchmarks from  benchy.api  import  BenchmarkRunner runner  =  BenchmarkRunner(benchmarks=suite,  tmp_dir='.',                                                            name=  'List  Allocation  Benchmark') n_benchs,  results  =  runner.run()
  • 18. Who is the faster ? {Benchmark('list  with  "*"'):        {'runtime':  {'timing':  0.47582697868347168,  'repeat':  3,  'success':  True,  'loops':  1000,   'timeBaselines':  1.0,  'units':  'ms'},        'memory':  {'usage':  0.3828125,  'units':  'MB',  'repeat':  3,  'success':  True}}, Benchmark('list  with  xrange'):        {'runtime':  {'timing':  5.623779296875,  'repeat':  3,  'success':  True,  'loops':  100,   'timeBaselines':  11.818958463504936,  'units':  'ms'},        'memory':  {'usage':  0.71484375,  'units':  'MB',  'repeat':  3,  'success':  True}}, Benchmark('list  with  range'):  {        'runtime':  {'timing':  6.5933513641357422,  'repeat':  3,  'success':  True,  'loops':  100,   'timeBaselines':  13.856615239384636,  'units':  'ms'},        'memory':  {'usage':  2.2109375,  'units':  'MB',  'repeat':  3,  'success':  True}}}
  • 19. Plot relative fig  =  runner.plot_relative(results,  horizontal=True) plt.savefig('%s_r.png'  %  runner.name,  bbox_inches='tight')
  • 21. Full report rst_text  =  runner.to_rst(results,  runner.name  +  'png',                runner.name  +  '_r.png') with  open('teste.rst',  'w')  as  f:                f.write(rst_text)
  • 24. Why ? Benchmark pairwise functions at Crab recsys library http://aimotion.blogspot.com.br/2013/03/performing-runtime-benchmarks-with.html
  • 25. Get involved Create the benchmarks as TestCases Check automatically for benchmark files and run like %nose.test() More setup and teardown control Group benchmarks at the same graph
  • 26. Improvements Added Database Handler Added Git Support Added New Runner Run benchmarks
  • 27. db.py import  sqlite3     class  BenchmarkDb(object):        """        Persistence  handler  for  bechmark  results        """        def  _create_tables(self):                self._cursor.execute("drop  table  if  exists  benchmarksuites")                self._cursor.execute("drop  table  if  exists  benchmarks")                self._cursor.execute("drop  table  if  exists  results")                ...                    self._cursor.execute('CREATE  TABLE                              benchmarks(checksum  text  PRIMARY  KEY,                          name  text,  description  text,  suite_id  integer,                            FOREIGN  KEY(suite_id)  REFERENCES  benchmarksuites(id))')                  self._cursor.execute('CREATE  TABLE  results(id  integer                          PRIMARY  KEY  AUTOINCREMENT,  checksum  text,                          timestamp  timestamp,  ncalls  text,  timing  float,  traceback  text,                          FOREIGN  KEY(checksum)  REFERENCES  benchmarks(checksum))')                  self._con.commit()          def  write_benchmark(self,  bm,  suite=None):                if  suite  is  not  None:                        self._cursor.execute('SELECT  id  FROM  benchmarksuites                                  where  name  =  "%s"'  %  suite.name)                        row  =  self._cursor.fetchone()                else:                        row  =  None                  if  row  ==  None:                        self._cursor.execute('INSERT  INTO  benchmarks  VALUES  (?,  ?,  ?,  ?)',                                (bm.checksum,  bm.name,  bm.description,  None))                else:                        self._cursor.execute('INSERT  INTO  benchmarks  VALUES  (?,  ?,  ?,  ?)',                                (bm.checksum,  bm.name,  bm.description,  row[0]))
  • 28. Improvements Added Database Handler Added Git Support Added New Runner Run benchmarks
  • 29. Git Repo class  GitRepository(Repository):        """        Read  some  basic  statistics  about  a  git  repository        """          def  __init__(self,  repo_path):                self.repo_path  =  repo_path                self.git  =  _git_command(self.repo_path)                (self.shas,  self.messages,                  self.timestamps,  self.authors)  =  self._parse_commit_log() [('d87fdf2', datetime.datetime(2013, 3, 22, 16, 55, 38)), ('a90a449', datetime.datetime(2013, 3, 22, 16, 54, 36)), ('fe66a86', datetime.datetime(2013, 3, 22, 16, 51, 2)), ('bea6b21', datetime.datetime(2013, 3, 22, 13, 14, 22)), ('bde5e63', datetime.datetime(2013, 3, 22, 5, 2, 56)), ('89634f6', datetime.datetime(2013, 3, 20, 4, 16, 19))]
  • 30. Git Repo class  BenchmarkRepository(object):        """        Manage  an  isolated  copy  of  a  repository  for  benchmarking        """        ...          def  _copy_repo(self):                if  os.path.exists(self.target_dir):                        print  'Deleting  %s  first'  %  self.target_dir                        #  response  =  raw_input('%s  exists,  delete?  y/n'  %  self.target_dir)                        #  if  response  ==  'n':                        #          raise  Exception('foo')                        cmd  =  'rm  -­‐rf  %s'  %  self.target_dir                        print  cmd                        os.system(cmd)                  self._clone(self.target_dir_tmp,  self.target_dir)                self._prep()                self._copy_benchmark_scripts_and_deps()          def  _clone(self,  source,  target):                cmd  =  'git  clone  %s  %s'  %  (source,  target)                print  cmd                os.system(cmd)          def  _copy_benchmark_scripts_and_deps(self):                pth,  _  =  os.path.split(os.path.abspath(__file__))                deps  =  [os.path.join(pth,  'run_benchmarks.py')]                if  self.dependencies  is  not  None:                        deps.extend(self.dependencies)                  for  dep  in  deps:                        cmd  =  'cp  %s  %s'  %  (dep,  self.target_dir)                        print  cmd                        proc  =  subprocess.Popen(cmd,  shell=True)                        proc.wait()
  • 31. Improvements Added Database Handler Added Git Support Added New Runner Run benchmarks
  • 32. New Runner   class  BenchmarkGitRunner(BenchmarkRunner):    ...              def  _register_benchmarks(self):                ex_benchmarks  =  self.db.get_benchmarks()                db_checksums  =  set(ex_benchmarks.index)                for  bm  in  self.benchmarks:                        if  bm.checksum  in  db_checksums:                                self.db.update_name(bm)                        else:                                print  'Writing  new  benchmark  %s,  %s'  %  (bm.name,                                                                                              bm.checksum)                                self.db.write_benchmark(bm)  
  • 33. New runner   class  BenchmarkGitRunner(BenchmarkRunner):    ...              def  _run_revision(self,  rev):                need_to_run  =  self._get_benchmarks_for_rev(rev)                  if  not  need_to_run:                        print  'No  benchmarks  need  running  at  %s'  %  rev                        return  0,  {}                  print  'Running  %d  benchmarks  for  revision  %s'  %  (len(need_to_run),  rev)                for  bm  in  need_to_run:                        print  bm.name                  self.bench_repo.switch_to_revision(rev)                  pickle_path  =  os.path.join(self.tmp_dir,  'benchmarks.pickle')                results_path  =  os.path.join(self.tmp_dir,  'results.pickle')                if  os.path.exists(results_path):                        os.remove(results_path)                pickle.dump(need_to_run,  open(pickle_path,  'w'))                  #  run  the  process                cmd  =  'python  %s/run_benchmarks.py  %s  %s'  %  (pickle_path,  results_path)                print  cmd                proc  =  subprocess.Popen(cmd,  stdout=subprocess.PIPE,                                                                stderr=subprocess.PIPE,                                                                shell=True,                                                                cwd=self.tmp_dir)                stdout,  stderr  =  proc.communicate()  
  • 34. New runner   class  BenchmarkGitRunner(BenchmarkRunner):    ...              def  _run_revision(self,  rev):                need_to_run  =  self._get_benchmarks_for_rev(rev)                  if  not  need_to_run:                        print  'No  benchmarks  need  running  at  %s'  %  rev                        return  0,  {}                  print  'Running  %d  benchmarks  for  revision  %s'  %  (len(need_to_run),  rev)                for  bm  in  need_to_run:                        print  bm.name                  self.bench_repo.switch_to_revision(rev)                #  run  the  process                cmd  =  'python  %s/run_benchmarks.py  %s  %s'  %  (pickle_path,  results_path)                print  cmd                proc  =  subprocess.Popen(cmd,  stdout=subprocess.PIPE,                                                                stderr=subprocess.PIPE,                                                                shell=True,                                                                cwd=self.tmp_dir)                stdout,  stderr  =  proc.communicate()                      if  stderr:                        if  ("object  has  no  attribute"  in  stderr  or                                'ImportError'  in  stderr):                                print  stderr                                print  'HARD  CLEANING!'                                self.bench_repo.hard_clean()                        print  stderr                    if  not  os.path.exists(results_path):                        print  'Failed  for  revision  %s'  %  rev                        return  len(need_to_run),  {}                results  =  pickle.load(open(results_path,  'r'))
  • 35. Improvements Added Database Handler Added Git Support Added New Runner Run benchmarks
  • 36. Running from  benchmark  import  Benchmark,  BenchmarkRepository,  BenchmarkGitRunner try:        REPO_PATH  =  config.get('setup',  'repo_path')        REPO_URL  =  config.get('setup',  'repo_url')        DB_PATH  =  config.get('setup',  'db_path')        TMP_DIR  =  config.get('setup',  'tmp_dir') except:        REPO_PATH  =  os.path.abspath(os.path.join(os.path.dirname(__file__),   "../"))        REPO_URL  =  'git@github.com:python-­‐recsys/crab.git'        DB_PATH  =  os.path.join(REPO_PATH,  'suite/benchmarks.db')        TMP_DIR  =  os.path.join(HOME,  'tmp/base_benchy/') PREPARE  =  """ python  setup.py  clean """ BUILD  =  """ python  setup.py  build_ext  -­‐-­‐inplace """ repo  =  BenchmarkRepository(REPO_PATH,  REPO_URL,  DB_PATH,  TMP_DIR)
  • 37. Running        common_setup  =  """          import  numpy          from  crab.metrics  import  cosine_distances          X  =  numpy.random.uniform(1,5,(1000,))        """          bench  =  Benchmark(statement,  setup_bk1,  name="Crab   Cosine")          suite  =  BenchmarkSuite()        suite.append(bench)                  statement  =  "cosine_distances(X,  X)"          runner  =  BenchmarkGitRunner(suite,  '.',  'Absolute   timing  in  ms')        n_benchs,  results  =  runner.run()          runner.plot_history(results)        plt.show()
  • 38. Improvements Historical commits from version control now benchmarked
  • 39. Working now: Module detection by_module  =  {} benchmarks  =  [] modules  =  ['metrics',                      'recommenders',                      'similarities'] for  modname  in  modules:        ref  =  __import__(modname)        by_module[modname]  =  [v  for  v  in  ref.__dict__.values()                                                    if  isinstance(v,  Benchmark)]        benchmarks.extend(by_module[modname]) for  bm  in  benchmarks:        assert(bm.name  is  not  None)
  • 41.
  • 42.
  • 43. Benchy Lightweight performing benchmark framework for Python scripts Marcel Caraciolo @marcelcaraciolo Developer, Cientist, contributor to the Crab recsys project, works with Python for 6 years, interested at mobile, education, machine learning and dataaaaa! Recife, Brazil - http://aimotion.blogspot.com