Optimization with Timeit

Code optimization analysis using Python's timeit module

06.19.2015

I needed to compare which way was the fastest for skipping to a certain point in a large CSV file. Python has an awesome module that makes it really easy. It’s called timeit.

I’ve demonstrated one simple way of using timeit to compare three different functions below.





import timeit
import os
import re
import mmap
import contextlib

path = './big.csv'

# Option 1
def option1():
    with open(path, 'rb') as f:
        #size = os.path.getsize(path)
        size_old = os.path.getsize('./big2.csv') # File before more data
        f.seek(size_old)
        f.read() # print r.read()

# Option 2
def option2():
    with open(path, 'r') as f:
        content = f.read()
        i = content.index('LAST')
        f.seek(i)
        f.readline() # Consume the old line.
        f.read() # print r.read()

# Option 3
def option3():
    with open(path, 'r+') as f:
        with contextlib.closing(mmap.mmap(f.fileno(), os.O_RDONLY)) as mf:
            mf.seek(0)
            m = re.search('LAST_DATE', mf)
            mf.seek(m.start())
            mf.readline() # Consume the old line.
            mf.read(m.end()) # print mf.read(m.end())

print `Option 1: `, timeit.timeit(option1, number=5000)
print `Option 2: `, timeit.timeit(option2, number=5000)
print `Option 3: `, timeit.timeit(option3, number=5000)