Chapter 8: Python Code for Specifying output file names with formatter() and regex()

See also

Example Code for suffix()

from ruffus import *

#---------------------------------------------------------------
#   create initial files
#
@originate([   ['job1.a.start', 'job1.b.start'],
               ['job2.a.start', 'job2.b.start'],
               ['job3.a.start', 'job3.b.start']    ])
def create_initial_file_pairs(output_files):
    # create both files as necessary
    for output_file in output_files:
        with open(output_file, "w") as oo: pass

#---------------------------------------------------------------
#
#   suffix
#
@transform(create_initial_file_pairs,             # name of previous task(s) (or list of files, or a glob)
            suffix(".start"),                     # matching suffix of the "input file"
            [".output.a.1", 45, ".output.b.1"])   # resulting suffix
def first_task(input_files, output_parameters):
    print "  input_parameters  = ", input_files
    print "  output_parameters = ", output_parameters


#
#       Run
#
pipeline_run([first_task])

Example Code for formatter()

from ruffus import *

#   create initial files
@originate([   ['job1.a.start', 'job1.b.start'],
               ['job2.a.start', 'job2.b.start'],
               ['job3.a.start', 'job3.c.start']    ])
def create_initial_file_pairs(output_files):
    # create both files as necessary
    for output_file in output_files:
        with open(output_file, "w") as oo: pass


#---------------------------------------------------------------
#
#   formatter
#

#   first task
@transform(create_initial_file_pairs,                               # Input

            formatter(".+/job(?P<JOBNUMBER>\d+).a.start",           # Extract job number
                      ".+/job[123].b.start"),                       # Match only "b" files

            ["{path[0]}/jobs{JOBNUMBER[0]}.output.a.1",             # Replacement list
             "{path[1]}/jobs{JOBNUMBER[0]}.output.b.1", 45])
def first_task(input_files, output_parameters):
    print "input_parameters = ", input_files
    print "output_parameters = ", output_parameters


#
#       Run
#
pipeline_run(verbose=0)

Example Code for formatter() with replacements in extra arguments

from ruffus import *

#   create initial files
@originate([   ['job1.a.start', 'job1.b.start'],
               ['job2.a.start', 'job2.b.start'],
               ['job3.a.start', 'job3.c.start']    ])
def create_initial_file_pairs(output_files):
    for output_file in output_files:
        with open(output_file, "w") as oo: pass


#---------------------------------------------------------------
#
#   print job number as an extra argument
#

#   first task
@transform(create_initial_file_pairs,                               # Input

            formatter(".+/job(?P<JOBNUMBER>\d+).a.start",           # Extract job number
                      ".+/job[123].b.start"),                       # Match only "b" files

            ["{path[0]}/jobs{JOBNUMBER[0]}.output.a.1",             # Replacement list
             "{path[1]}/jobs{JOBNUMBER[0]}.output.b.1"],

             "{JOBNUMBER[0]}"
def first_task(input_files, output_parameters, job_number):
    print job_number, ":", input_files


pipeline_run(verbose=0)

Example Code for formatter() in Zoos

from ruffus import *

#   Make directories
@mkdir(["tiger", "lion", "dog", "crocodile", "rose"])

@originate(
            #   List of animals and plants
            [    "tiger/mammals.wild.animals",
                "lion/mammals.wild.animals",
                "lion/mammals.handreared.animals",
                "dog/mammals.tame.animals",
                "dog/mammals.wild.animals",
                "crocodile/reptiles.wild.animals",
                "rose/flowering.handreared.plants"])
def create_initial_files(output_file):
    with open(output_file, "w") as oo: pass


#   Put different animals in different directories depending on their clade
@transform(create_initial_files,                                       # Input

           formatter(".+/(?P<clade>\w+).(?P<tame>\w+).animals"),       # Only animals: ignore plants!

           "{subpath[0][1]}/{clade[0]}/{tame[0]}.{subdir[0][0]}.food", # Replacement

           "{subpath[0][1]}/{clade[0]}",                               # new_directory
           "{subdir[0][0]}",                                           # animal_name
           "{tame[0]}")                                                # tameness
def feed(input_file, output_file, new_directory, animal_name, tameness):
    print "Food for the {tameness:11s} {animal_name:9s} = {output_file:90s} will be placed in {new_directory}".format(**locals())


pipeline_run(verbose=0)


Results in:

::

    >>> pipeline_run(verbose=0)
    Food for the wild        crocodile = ./reptiles/wild.crocodile.food will be placed in ./reptiles
    Food for the tame        dog       = ./mammals/tame.dog.food        will be placed in ./mammals
    Food for the wild        dog       = ./mammals/wild.dog.food        will be placed in ./mammals
    Food for the handreared  lion      = ./mammals/handreared.lion.food will be placed in ./mammals
    Food for the wild        lion      = ./mammals/wild.lion.food       will be placed in ./mammals
    Food for the wild        tiger     = ./mammals/wild.tiger.food      will be placed in ./mammals

Example Code for regex() in zoos

from ruffus import *

#   Make directories
@mkdir(["tiger", "lion", "dog", "crocodile", "rose"])

@originate(
            #   List of animals and plants
            [    "tiger/mammals.wild.animals",
                "lion/mammals.wild.animals",
                "lion/mammals.handreared.animals",
                "dog/mammals.tame.animals",
                "dog/mammals.wild.animals",
                "crocodile/reptiles.wild.animals",
                "rose/flowering.handreared.plants"])
def create_initial_files(output_file):
    with open(output_file, "w") as oo: pass



#   Put different animals in different directories depending on their clade
@transform(create_initial_files,                                        # Input

           regex(r"(.*?/?)(\w+)/(?P<clade>\w+).(?P<tame>\w+).animals"), # Only animals: ignore plants!

           r"\1/\g<clade>/\g<tame>.\2.food",                            # Replacement

           r"\1/\g<clade>",                                             # new_directory
           r"\2",                                                       # animal_name
           "\g<tame>")                                                  # tameness
def feed(input_file, output_file, new_directory, animal_name, tameness):
    print "Food for the {tameness:11s} {animal_name:9s} = {output_file:90s} will be placed in {new_directory}".format(**locals())


pipeline_run(verbose=0)


Results in:

::

    >>> pipeline_run(verbose=0)
    Food for the wild        crocodile = reptiles/wild.crocodile.food will be placed in reptiles
    Food for the tame        dog       = mammals/tame.dog.food        will be placed in mammals
    Food for the wild        dog       = mammals/wild.dog.food        will be placed in mammals
    Food for the handreared  lion      = mammals/handreared.lion.food will be placed in mammals
    Food for the wild        lion      = mammals/wild.lion.food       will be placed in mammals
    Food for the wild        tiger     = mammals/wild.tiger.food      will be placed in mammals