/*
mrHeuristic: A Map-Reduce implementation of the FHM workflow discovery algorithm
Copyright (C) 2013-2014 Joerg Evermann

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
*/

package mrHeuristic;

import java.io.IOException;
import java.net.URI;
import java.sql.Timestamp;
import java.util.*;
import java.util.Map.Entry;

import mrHeuristic.WfRType;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;


public class heuristic {

	public static class Map1 extends MapReduceBase implements
			Mapper<LongWritable, Text, CaseID, EventTimestampPair> {

		private EventTimestampPair etPair = new EventTimestampPair();
		private CaseID caseID = new CaseID();

		public void map(LongWritable key, Text value,
				OutputCollector<CaseID, EventTimestampPair> output,
				Reporter reporter) throws IOException {
			String line = value.toString();
			StringTokenizer tokenizer = new StringTokenizer(line, "\t");

			if (tokenizer.countTokens() == 3) {
				caseID.set(new Text(tokenizer.nextToken()));
				etPair.set(tokenizer.nextToken(),
						Timestamp.valueOf(tokenizer.nextToken()));
				output.collect(caseID, etPair);
			}
		}
	}

	public static class Reduce1 extends MapReduceBase implements
			Reducer<CaseID, EventTimestampPair, EventPair, WorkflowRelationExt> {

		Hashtable<String, WorkflowRelationExt> map;

		public void reduce(CaseID key, Iterator<EventTimestampPair> values,
				OutputCollector<EventPair, WorkflowRelationExt> output,
				Reporter reporter) throws IOException {

			// read it all into memory, sorted by timestamp and events
			SortedMap<EventTimestampPair, String> timeSorted = new TreeMap<EventTimestampPair, String>();

			while (values.hasNext()) {
				EventTimestampPair etPair = new EventTimestampPair(values.next());
				timeSorted.put(etPair, etPair.getEvent());
			}

			map = new Hashtable<String, WorkflowRelationExt>();

			// iterate over the sorted list to generate the follows relation
			Object[] sortedArray = timeSorted.values().toArray();
			String ev1, ev2, ev3;

			for (int i = 0; i < sortedArray.length; i++) {
				ev1 = (String) sortedArray[i];
				addToOutput(ev1, ev1, WfRType.COUNT);
				if (i + 1 < sortedArray.length) {
					ev2 = (String) sortedArray[i + 1];
					addToOutput(ev1, ev2, WfRType.DIRECT);
					addToOutput(ev1, ev2, WfRType.SUCCESSOR);
					for (int j = i + 2; j < sortedArray.length; j++) {
						ev3 = (String) sortedArray[j];
						if ((j == i + 2) && ev3.equals(ev1) && !ev3.equals(ev2))
							addToOutput(ev1, ev2, WfRType.LOOPTWO);
						// else {
						addToOutput(ev1, ev3, WfRType.SUCCESSOR);
						// }
					}
				}
			}

			for (String mapkey : map.keySet()) {
				WorkflowRelationExt wfRel = map.get(mapkey);
				output.collect(
						new EventPair(wfRel.getEvent1(), wfRel.getEvent2()),
						wfRel);
			}
		}

		private void addToOutput(String ev1, String ev2, WfRType type) throws IOException {
			WorkflowRelation wfRel;

			if (ev1.compareTo(ev2) <= 0) {
				wfRel = new WorkflowRelation(ev1, ev2, type, true);
			} else {
				wfRel = new WorkflowRelation(ev2, ev1, type, false);
			}
			WorkflowRelationExt wfRelExt = map.get(wfRel.toString());
			if (wfRelExt == null) {
				wfRelExt = new WorkflowRelationExt(wfRel, 1);
				map.put(wfRel.toString(), wfRelExt);
			} else {
				wfRelExt.incrementCount();
			}
		}
	}

	public static class Combine2 extends MapReduceBase
			implements
			Reducer<EventPair, WorkflowRelationExt, EventPair, WorkflowRelationExt> {

		public void reduce(EventPair key, Iterator<WorkflowRelationExt> values,
				OutputCollector<EventPair, WorkflowRelationExt> output,
				Reporter reporter) throws IOException {

			Integer direct1 = 0;
			Integer looptwo1 = 0;
			Integer successor1 = 0;
			Integer count = 0;
			Integer direct2 = 0;
			Integer looptwo2 = 0;
			Integer successor2 = 0;

			while (values.hasNext()) {
				WorkflowRelationExt wr = values.next();
				if (wr.getDir()) {
					if (wr.getType() == WfRType.DIRECT)
						direct1 += wr.getCount();
					if (wr.getType() == WfRType.LOOPTWO)
						looptwo1 += wr.getCount();
					if (wr.getType() == WfRType.SUCCESSOR)
						successor1 += wr.getCount();
					if (wr.getType() == WfRType.COUNT)
						count += wr.getCount();
				} else {
					if (wr.getType() == WfRType.DIRECT)
						direct2 += wr.getCount();
					if (wr.getType() == WfRType.LOOPTWO)
						looptwo2 += wr.getCount();
					if (wr.getType() == WfRType.SUCCESSOR)
						successor2 += wr.getCount();
				}
			}

			output.collect(key,
					new WorkflowRelationExt(key.getEvent1(), key.getEvent2(),
							WfRType.DIRECT, true, direct1));
			output.collect(key,
					new WorkflowRelationExt(key.getEvent1(), key.getEvent2(),
							WfRType.LOOPTWO, true, looptwo1));
			output.collect(key,
					new WorkflowRelationExt(key.getEvent1(), key.getEvent2(),
							WfRType.SUCCESSOR, true, successor1));
			output.collect(key,
					new WorkflowRelationExt(key.getEvent1(), key.getEvent2(),
							WfRType.COUNT, true, count));

			output.collect(key,
					new WorkflowRelationExt(key.getEvent1(), key.getEvent2(),
							WfRType.DIRECT, false, direct2));
			output.collect(key,
					new WorkflowRelationExt(key.getEvent1(), key.getEvent2(),
							WfRType.LOOPTWO, false, looptwo2));
			output.collect(key,
					new WorkflowRelationExt(key.getEvent1(), key.getEvent2(),
							WfRType.SUCCESSOR, false, successor2));
		}
	}

	public static class Reduce2 extends MapReduceBase
			implements
			Reducer<EventPair, WorkflowRelationExt, IntWritable, DependencyRelation> {

		public void reduce(EventPair key, Iterator<WorkflowRelationExt> values,
				OutputCollector<IntWritable, DependencyRelation> output,
				Reporter reporter) throws IOException {

			Integer direct1 = 0;
			Integer looptwo1 = 0;
			Integer successor1 = 0;
			Integer count = 0;
			Integer direct2 = 0;
			Integer looptwo2 = 0;
			Integer successor2 = 0;

			while (values.hasNext()) {
				WorkflowRelationExt wr = values.next();
				if (wr.getDir()) {
					if (wr.getType() == WfRType.DIRECT)
						direct1 += wr.getCount();
					if (wr.getType() == WfRType.LOOPTWO)
						looptwo1 += wr.getCount();
					if (wr.getType() == WfRType.SUCCESSOR)
						successor1 += wr.getCount();
					if (wr.getType() == WfRType.COUNT)
						count += wr.getCount();
				} else {
					if (wr.getType() == WfRType.DIRECT)
						direct2 += wr.getCount();
					if (wr.getType() == WfRType.LOOPTWO)
						looptwo2 += wr.getCount();
					if (wr.getType() == WfRType.SUCCESSOR)
						successor2 += wr.getCount();
				}
			}

			float dep;
			IntWritable iw = new IntWritable(0);
			
			if (!key.getEvent1().equals(key.getEvent2())) {
				dep = (float) (direct1 - direct2)
						/ (float) (direct1 + direct2 + 1);
				if (dep > 0)
					output.collect(iw, new DependencyRelation(key.getEvent1(),
							key.getEvent2(), WfRType.DIRECT, dep));
				dep = (float) (direct2 - direct1)
						/ (float) (direct1 + direct2 + 1);
				if (dep > 0)
					output.collect(
							iw,
							new DependencyRelation(key.getEvent2(), key
									.getEvent1(), WfRType.DIRECT,
									(float) (direct2 - direct1)
											/ (float) (direct1 + direct2 + 1)));
			} else {
				if (direct1 > 0)
					output.collect(iw, new DependencyRelation(key.getEvent1(),
							key.getEvent2(), WfRType.DIRECT, (float) direct1
									/ (float) (direct1 + 1)));
			}
			dep = (float) (looptwo1 + looptwo2)
					/ (float) (looptwo1 + looptwo2 + 1);
			if (dep > 0) {
				output.collect(iw,
						new DependencyRelation(key.getEvent1(),
								key.getEvent2(), WfRType.LOOPTWO, dep));
				output.collect(iw,
						new DependencyRelation(key.getEvent2(),
								key.getEvent1(), WfRType.LOOPTWO, dep));
			}

			if (successor1 > 0)
				output.collect(iw,
						new DependencyRelation(key.getEvent1(),
								key.getEvent2(), WfRType.SUCCESSOR,
								(float) successor1));
			if (successor2 > 0)
				output.collect(iw,
						new DependencyRelation(key.getEvent2(),
								key.getEvent1(), WfRType.SUCCESSOR,
								(float) successor2));

			if (count > 0)
				output.collect(iw,
						new DependencyRelation(key.getEvent1(),
								key.getEvent2(), WfRType.COUNT, (float) count));
		}
	}
/*
	public static class Map3TestCase extends MapReduceBase implements
	Mapper<IntWritable, DependencyRelation, IntWritable, DependencyRelation> {

		public void map(IntWritable key, DependencyRelation value,
				OutputCollector<IntWritable, DependencyRelation> output,
				Reporter reporter) throws IOException {

			output.collect(new IntWritable(0), new DependencyRelation( "A", "B", WfRType.DIRECT, 0.998f ));
			output.collect(new IntWritable(0), new DependencyRelation( "A", "C", WfRType.DIRECT, 0.998f ));
			output.collect(new IntWritable(0), new DependencyRelation( "B", "C", WfRType.DIRECT, 0.031f ));
			output.collect(new IntWritable(0), new DependencyRelation( "B", "D", WfRType.DIRECT, 0.995f ));
			output.collect(new IntWritable(0), new DependencyRelation( "B", "E", WfRType.DIRECT, 0.995f ));
			output.collect(new IntWritable(0), new DependencyRelation( "B", "I", WfRType.DIRECT, 0.323f ));
			output.collect(new IntWritable(0), new DependencyRelation( "B", "J", WfRType.DIRECT, 0.084f ));
			output.collect(new IntWritable(0), new DependencyRelation( "C", "D", WfRType.DIRECT, 0.328f ));
			output.collect(new IntWritable(0), new DependencyRelation( "C", "E", WfRType.DIRECT, 0.272f ));
			output.collect(new IntWritable(0), new DependencyRelation( "C", "F", WfRType.DIRECT, 0.421f ));
			output.collect(new IntWritable(0), new DependencyRelation( "C", "G", WfRType.DIRECT, 0.492f ));
			output.collect(new IntWritable(0), new DependencyRelation( "C", "I", WfRType.DIRECT, 0.997f ));
			output.collect(new IntWritable(0), new DependencyRelation( "D", "F", WfRType.DIRECT, 0.650f ));
			output.collect(new IntWritable(0), new DependencyRelation( "D", "K", WfRType.DIRECT, 0.833f ));
			output.collect(new IntWritable(0), new DependencyRelation( "D", "L", WfRType.DIRECT, 0.300f ));
			output.collect(new IntWritable(0), new DependencyRelation( "E", "G", WfRType.DIRECT, 0.620f ));
			output.collect(new IntWritable(0), new DependencyRelation( "E", "L", WfRType.DIRECT, 0.167f ));
			output.collect(new IntWritable(0), new DependencyRelation( "F", "E", WfRType.DIRECT, 0.993f ));
			output.collect(new IntWritable(0), new DependencyRelation( "F", "H", WfRType.DIRECT, 0.997f ));
			output.collect(new IntWritable(0), new DependencyRelation( "F", "I", WfRType.DIRECT, 0.0842f ));
			output.collect(new IntWritable(0), new DependencyRelation( "F", "L", WfRType.DIRECT, 0.667f ));
			output.collect(new IntWritable(0), new DependencyRelation( "G", "D", WfRType.DIRECT, 0.993f ));
			output.collect(new IntWritable(0), new DependencyRelation( "G", "H", WfRType.DIRECT, 0.997f ));
			output.collect(new IntWritable(0), new DependencyRelation( "G", "I", WfRType.DIRECT, 0.0232f ));
			output.collect(new IntWritable(0), new DependencyRelation( "G", "L", WfRType.DIRECT, 0.400f ));
			output.collect(new IntWritable(0), new DependencyRelation( "H", "C", WfRType.DIRECT, 0.15f ));
			output.collect(new IntWritable(0), new DependencyRelation( "H", "I", WfRType.DIRECT, 0.205f ));
			output.collect(new IntWritable(0), new DependencyRelation( "H", "K", WfRType.DIRECT, 0.998f ));
			output.collect(new IntWritable(0), new DependencyRelation( "I", "D", WfRType.DIRECT, 0.040f ));
			output.collect(new IntWritable(0), new DependencyRelation( "I", "J", WfRType.DIRECT, 0.998f ));
			output.collect(new IntWritable(0), new DependencyRelation( "J", "D", WfRType.DIRECT, 0.328f ));
			output.collect(new IntWritable(0), new DependencyRelation( "J", "E", WfRType.DIRECT, 0.395f ));
			output.collect(new IntWritable(0), new DependencyRelation( "J", "F", WfRType.DIRECT, 0.073f ));
			output.collect(new IntWritable(0), new DependencyRelation( "J", "G", WfRType.DIRECT, 0.054f ));
			output.collect(new IntWritable(0), new DependencyRelation( "J", "H", WfRType.DIRECT, 0.058f ));
			output.collect(new IntWritable(0), new DependencyRelation( "J", "K", WfRType.DIRECT, 0.997f ));
			output.collect(new IntWritable(0), new DependencyRelation( "J", "L", WfRType.DIRECT, 0.833f ));
			output.collect(new IntWritable(0), new DependencyRelation( "L", "C", WfRType.DIRECT, 0.944f ));
			output.collect(new IntWritable(0), new DependencyRelation( "L", "H", WfRType.DIRECT, 0.267f ));
			
			output.collect(new IntWritable(0), new DependencyRelation( "I", "I", WfRType.DIRECT, 0.997f ));
			
			output.collect(new IntWritable(0), new DependencyRelation( "A", "A", WfRType.COUNT, 1000f ));
			output.collect(new IntWritable(0), new DependencyRelation( "B", "B", WfRType.COUNT, 1000f ));
			output.collect(new IntWritable(0), new DependencyRelation( "C", "C", WfRType.COUNT, 1036f ));
			output.collect(new IntWritable(0), new DependencyRelation( "D", "D", WfRType.COUNT, 921f ));
			output.collect(new IntWritable(0), new DependencyRelation( "E", "E", WfRType.COUNT, 998f ));
			output.collect(new IntWritable(0), new DependencyRelation( "F", "F", WfRType.COUNT, 908f ));
			output.collect(new IntWritable(0), new DependencyRelation( "G", "G", WfRType.COUNT, 998f ));
			output.collect(new IntWritable(0), new DependencyRelation( "H", "H", WfRType.COUNT, 987f ));
			output.collect(new IntWritable(0), new DependencyRelation( "I", "I", WfRType.COUNT, 2010f ));
			output.collect(new IntWritable(0), new DependencyRelation( "J", "J", WfRType.COUNT, 1036f ));
			output.collect(new IntWritable(0), new DependencyRelation( "K", "K", WfRType.COUNT, 1000f ));
			output.collect(new IntWritable(0), new DependencyRelation( "L", "L", WfRType.COUNT, 36f ));
			
			output.collect(new IntWritable(0), new DependencyRelation( "D", "F", WfRType.LOOPTWO, .995f ));
			output.collect(new IntWritable(0), new DependencyRelation( "F", "D", WfRType.LOOPTWO, .995f ));
			output.collect(new IntWritable(0), new DependencyRelation( "E", "G", WfRType.LOOPTWO, .996f ));
			output.collect(new IntWritable(0), new DependencyRelation( "G", "E", WfRType.LOOPTWO, .996f ));
			output.collect(new IntWritable(0), new DependencyRelation( "I", "B", WfRType.LOOPTWO, .950f ));
			output.collect(new IntWritable(0), new DependencyRelation( "I", "D", WfRType.LOOPTWO, .975f ));
			output.collect(new IntWritable(0), new DependencyRelation( "I", "E", WfRType.LOOPTWO, .984f ));
			output.collect(new IntWritable(0), new DependencyRelation( "I", "F", WfRType.LOOPTWO, .983f ));
			output.collect(new IntWritable(0), new DependencyRelation( "I", "G", WfRType.LOOPTWO, .983f ));
			output.collect(new IntWritable(0), new DependencyRelation( "I", "H", WfRType.LOOPTWO, .990f ));
			output.collect(new IntWritable(0), new DependencyRelation( "I", "I", WfRType.LOOPTWO, .996f ));
			
		}
	}
*/

	public static class Reduce3 extends MapReduceBase implements
			Reducer<IntWritable, DependencyRelation, EventPair, EventPair> {

		private static float DeltaA = 0;
		private static float DeltaL1L = 0;
		private static float DeltaL2L = 0;
		private static float DeltaLong = 0;
		private static float DeltaRel = 0;

		public void configure(JobConf conf) {
			DeltaA = Float.parseFloat(conf.get("DeltaA"));
			DeltaL1L = Float.parseFloat(conf.get("DeltaL1L"));
			DeltaL2L = Float.parseFloat(conf.get("DeltaL2L"));
			DeltaLong = Float.parseFloat(conf.get("DeltaLong"));
			DeltaRel = Float.parseFloat(conf.get("DeltaRel"));
		}

		public void reduce(IntWritable key, Iterator<DependencyRelation> values,
				OutputCollector<EventPair, EventPair> output, Reporter reporter)
				throws IOException {

			TreeSet<Text> taskList = new TreeSet<Text>();
			TwoDimTable<Text, Text, Float> depMap = new TwoDimTable<Text, Text, Float>(new Float(0));
			TwoDimTable<Text, Text, Float> l2lMap = new TwoDimTable<Text, Text, Float>(new Float(0));
			TwoDimTable<Text, Text, Float> succMap = new TwoDimTable<Text, Text, Float>(new Float(0));
			HashMap<Text, Float> countMap = new HashMap<Text, Float>();

			while (values.hasNext()) {
				DependencyRelation depRel = values.next();

				taskList.add(depRel.getEvent1());
				taskList.add(depRel.getEvent2());
				if (depRel.getType() == WfRType.DIRECT) {
					depMap.put(depRel.getEvent1(), depRel.getEvent2(),
							depRel.getDep());
				}
				if (depRel.getType() == WfRType.LOOPTWO) {
					l2lMap.put(depRel.getEvent1(), depRel.getEvent2(),
							depRel.getDep());
				}
				if (depRel.getType() == WfRType.SUCCESSOR) {
					succMap.put(depRel.getEvent1(), depRel.getEvent2(),
							depRel.getDep());
				}
				if (depRel.getType() == WfRType.COUNT) {
					countMap.put(depRel.getEvent1(), depRel.getDep());
				}
			}
			int nTasks = taskList.size();
			Text[] tasks = taskList.toArray(new Text[0]);

			// Step 2:
			// Construct set C1
			Set<EventPair> C1 = new TreeSet<EventPair>();
			for (int i = 0; i < nTasks; i++) {
				if (depMap.get(tasks[i], tasks[i]) >= DeltaL1L)
					C1.add(new EventPair(tasks[i], tasks[i]));
			}

			// Step 3:
			// Construct set C2
			Set<EventPair> C2 = new TreeSet<EventPair>();
			for (int i = 0; i < nTasks; i++) {
				for (int j = 0; j < nTasks; j++) {
					if (depMap.get(tasks[i], tasks[i]) == 0
							&& depMap.get(tasks[j], tasks[j]) == 0
							&& l2lMap.get(tasks[i], tasks[j]) >= DeltaL2L)
						C2.add(new EventPair(tasks[i], tasks[j]));
				}
			}

			// Step 4:
			// Construct set COut
			Set<EventPair> COut = new TreeSet<EventPair>();
			// For each task
			for (int i = 0; i < nTasks; i++) {
				// Find strongest follower
				float maxval = Float.MIN_VALUE;
				HashSet<Integer> maxind = new HashSet<Integer>();
				for (int j = 0; j < nTasks; j++) {
					if (i != j) {
						if (depMap.get(tasks[i], tasks[j]) > maxval) {
							maxval = depMap.get(tasks[i], tasks[j]);
							maxind.clear(); maxind.add(j);
						}
						if (depMap.get(tasks[i], tasks[j]) == maxval) {
							maxind.add(j);
						}
					}
				}
				for (Integer j : maxind) {
					COut.add(new EventPair(tasks[i], tasks[j]));
				}
			}

			// Step 5:
			// Construct set CIn
			Set<EventPair> CIn = new TreeSet<EventPair>();
			// For each task
			for (int i = 0; i < nTasks; i++) {
				// Find strongest cause
				float maxval = Float.MIN_VALUE;
				HashSet<Integer> maxind = new HashSet<Integer>();
				for (int j = 0; j < nTasks; j++) {
					if (i != j) {
						if (depMap.get(tasks[j], tasks[i]) > maxval) {
							maxval = depMap.get(tasks[j], tasks[i]);
							maxind.clear(); maxind.add(j);
						}
						if (depMap.get(tasks[j], tasks[i]) == maxval) {
							maxind.add(j);
						}
					}
				}
				for (Integer j : maxind) {
					CIn.add(new EventPair(tasks[j], tasks[i]));
				}
			}

			// Step 6:
			// Construct set COutPrime
			Set<EventPair> COutPrime = new TreeSet<EventPair>();
			for (EventPair ep_ax : COut) {
				if (depMap.get(ep_ax.getEvent1Text(), ep_ax.getEvent2Text()) < DeltaA) {
					for (EventPair ep_by : COut) {
						for (EventPair ep_ab : C2) {
							if (ep_ax.getEvent1Text().equals(
									ep_ab.getEvent1Text())
									&& ep_by.getEvent1Text().equals(
											ep_ab.getEvent2Text())
									&& (depMap.get(ep_by.getEvent1Text(),
											ep_by.getEvent2Text())
											- depMap.get(ep_ax.getEvent1Text(),
													ep_ax.getEvent2Text()) > DeltaRel)) {
								COutPrime.add(ep_ax); 
								break;
							}
						}
					}
				}
			}
			// Step 7:
			// Remove COutPrime from COut
			COut.removeAll(COutPrime);

			// Step 8:
			// Construct set CInPrime
			Set<EventPair> CInPrime = new TreeSet<EventPair>();
			for (EventPair ep_xa : CIn) {
				if (depMap.get(ep_xa.getEvent1Text(), ep_xa.getEvent2Text()) < DeltaA) {
					for (EventPair ep_yb : CIn) {
						for (EventPair ep_ab : C2) {

							if (ep_xa.getEvent2Text().equals(
									ep_ab.getEvent1Text())
									&& ep_yb.getEvent2Text().equals(
											ep_ab.getEvent2Text())
									&& (depMap.get(ep_yb.getEvent1Text(),
											ep_yb.getEvent2Text())
											- depMap.get(ep_xa.getEvent1Text(),
													ep_xa.getEvent2Text()) > DeltaRel)) {
								CInPrime.add(ep_xa); 
								break;
							}
						}
					}
				}
			}
			// Step 9:
			// Remove CInPrime from CIn
			CIn.removeAll(CInPrime);

			// Step 10:
			Set<EventPair> COutPP = new TreeSet<EventPair>();
			for (int i = 0; i < nTasks; i++) {
				for (int j = 0; j < nTasks; j++) {
					if (depMap.get(tasks[i], tasks[j]) >= DeltaA) {
						COutPP.add(new EventPair(tasks[i], tasks[j]));
					}
					for (EventPair ep : COut) {
						if (ep.getEvent1().equals(tasks[i])
								&& (depMap.get(ep.getEvent1Text(), ep.getEvent2Text()) - depMap.get(tasks[i], tasks[j])) < DeltaRel) {
							COutPP.add(new EventPair(tasks[i], tasks[j]));
							break;
						}
					}
				}
			}

			// Step 11:
			Set<EventPair> CInPP = new TreeSet<EventPair>();
			for (int i = 0; i < nTasks; i++) {
				for (int j = 0; j < nTasks; j++) {
					if (depMap.get(tasks[i], tasks[j]) >= DeltaA) {
						CInPP.add(new EventPair(tasks[i], tasks[j]));
					}
					for (EventPair ep : CIn) {
						if (ep.getEvent1().equals(tasks[i])
								&& (depMap.get(ep.getEvent1Text(), ep.getEvent2Text()) - depMap.get(tasks[i], tasks[j])) < DeltaRel) {
							CInPP.add(new EventPair(tasks[i], tasks[j]));
							break;
						}
					}
				}
			}

			// Step 12:
			// Construct the Dependency Graph DG
			Set<EventPair> DG = new TreeSet<EventPair>();
			DG.addAll(C1);
			DG.addAll(C2);
// This may have been forgotten in Step 12 of Definition 6 in the IEEE (2011) paper?
			DG.addAll(CIn);
			DG.addAll(COut);
// end forgotten
			DG.addAll(COutPP);
			DG.addAll(CInPP);

			// Deal with Long Distance Dependencies
			for (int i = 0; i < nTasks; i++) {
				for (int j = 0; j < nTasks; j++) {
					Float countSucc = succMap.get(tasks[i], tasks[j]);
					if (countSucc != null) {
						float countA = countMap.get(tasks[i]);
						float countB = countMap.get(tasks[j]);
						if (2
								* countSucc
								/ (countA + countB + 1)
								- (2 * Math.abs(countA - countB) / (countA
										+ countB + 1)) > DeltaLong)
							DG.add(new EventPair(tasks[i], tasks[j]));
					}
				}
			}

			// Send the DG to output
			for (EventPair ep : DG) {
				output.collect(ep, ep);
			}

		}
	}

	public static class Map4 extends MapReduceBase implements
			Mapper<LongWritable, Text, CaseID, EventTimestampPair> {

		private EventTimestampPair etPair = new EventTimestampPair();
		private CaseID caseID = new CaseID();

		public void map(LongWritable key, Text value,
				OutputCollector<CaseID, EventTimestampPair> output,
				Reporter reporter) throws IOException {
			String line = value.toString();
			StringTokenizer tokenizer = new StringTokenizer(line, "\t");

			if (tokenizer.countTokens() == 3) {
				caseID.set(new Text(tokenizer.nextToken()));
				etPair.set(tokenizer.nextToken(),
						Timestamp.valueOf(tokenizer.nextToken()));
				output.collect(caseID, etPair);
			}
		}
	}

	public static class Reduce4 extends MapReduceBase implements
			Reducer<CaseID, EventTimestampPair, CNetAugmentation, IntWritable> {

		private static DependencyGraph dg;
		private static String dgpath;
		HashMap<CNetAugmentation, Integer> cnetaugs;
		Hashtable<String, WorkflowRelationExt> map;
		
		public void configure(JobConf conf) {
			dgpath = conf.get("DGPath");
		}		

		public void readDG() {
System.out.println("readDG for Reduce4 Called");
			if (dg == null) {
				dg = new DependencyGraph();
System.out.println("Created new DG");			
				EventPair key = new EventPair();
				EventPair val = new EventPair();
			
				SequenceFile.Reader reader = null;
				try {
					FileSystem fs = FileSystem.get(new URI(dgpath + "/part-00000"), new Configuration());
					Path p = new Path(new URI(dgpath + "/part-00000"));
					while (!fs.exists(p)) {
						try {
							System.out.println("Going to sleep");
						    Thread.sleep(1000);
						} catch(InterruptedException ex) {
						    Thread.currentThread().interrupt();
						}
					}
					reader = new SequenceFile.Reader(fs, p, new Configuration());
					while (reader.next(key, val)) {
System.out.println("EventPair: " + key.toString());
						dg.addSucc(key.getEvent1(), key.getEvent2());
						dg.addPred(key.getEvent2(), key.getEvent1());
					}
				} catch (Exception e) {
					e.printStackTrace(System.out);
				} finally {
					IOUtils.closeStream(reader);
				}
			}		
		}		

		public void reduce(CaseID key, Iterator<EventTimestampPair> values,
				OutputCollector<CNetAugmentation, IntWritable> output,
				Reporter reporter) throws IOException {

			readDG();

			// read it all into memory, sorted by timestamp and events
			SortedMap<EventTimestampPair, String> timeSorted = new TreeMap<EventTimestampPair, String>();
			while (values.hasNext()) {
				EventTimestampPair etPair = new EventTimestampPair(values.next());
				timeSorted.put(etPair, etPair.getEvent());
			}
			// this is now the time-sorted trace
			ArrayList<String> trace = new ArrayList<String>( timeSorted.values() );
			// this will hold the list of tuples representing the augmented CNet
			cnetaugs = new HashMap<CNetAugmentation, Integer>();
			
			for (int current = 0; current < trace.size()-1; current++) {
				// get the current event
				String ev1 = trace.get(current);				
				// Hold the candidate successors
				Set<Text> candidates = new HashSet<Text>();
				// Rest of the trace after the current event
				List<String> rest = trace.subList(current+1, trace.size());
				// for each successor of the current event
				for (String s: dg.getSucc(ev1)) {
					// if the rest of the trace contains the successor
					if (rest.contains(s)) {
						int succind = rest.indexOf(s); 
						// check whether any of its predecessors are more recent than the current event
						// i.e. whether the rest of the trace up to the successor contains any of the 
						// successor's predecessors
						Set<String> preds = new HashSet<String>(dg.getPred(s));
						if (!preds.removeAll(rest.subList(0, succind))) {
							candidates.add(new Text(s));
						}
					}
				}
				
				if (candidates.size() > 0)
					addToOutput(ev1, candidates.toArray(new Text[0]), CNetType.SUCC);	
			}

			for (int current = 1; current < trace.size(); current++) {
				// get the current event
				String ev1 = trace.get(current);				
				// Hold the candidate predecessors
				Set<Text> candidates = new HashSet<Text>();
				// Beginning of the trace before the current event
				List<String> rest = trace.subList(0, current);
				// for each predecessor of the current event
				for (String p: dg.getPred(ev1)) {
					// if the beginning of the trace contains the predecessor
					if (rest.contains(p)) {
						int predind = rest.lastIndexOf(p); 
						// check whether any of its successors are less recent than the current event
						// i.e. whether the rest of the trace up to the predecessor contains any of the 
						// predecessor's successors
						Set<String> succs = new HashSet<String>(dg.getSucc(p));
						if (!succs.removeAll(rest.subList(predind+1, rest.size()))) {
							candidates.add(new Text(p));
						}
					}
				}
				
				if (candidates.size() > 0)
					addToOutput(ev1, candidates.toArray(new Text[0]), CNetType.PRED);	
			}

			for (Entry<CNetAugmentation, Integer> cni : cnetaugs.entrySet()) {
				output.collect(cni.getKey(), new IntWritable(cni.getValue()));
			}
		}

		private void addToOutput(String ev1, Text[] ev2, CNetType dir) {

			CNetAugmentation cnetaug = new CNetAugmentation(ev1, ev2, dir);
			Integer count = cnetaugs.get(cnetaug);

			if (count != null) {
				count++;
				cnetaugs.put(cnetaug, count);
			} else {
				cnetaugs.put(cnetaug, new Integer(1));
			}
		}

	}
	
	public static class Reduce5 extends MapReduceBase implements
	Reducer<CNetAugmentation, IntWritable, CNetAugmentation, IntWritable> {

		public void reduce(CNetAugmentation key, Iterator<IntWritable> values,
				OutputCollector<CNetAugmentation, IntWritable> output,
				Reporter reporter) throws IOException {

			// read it all into memory, sorted by timestamp and events
			int total = 0;

			while (values.hasNext()) {
				total += values.next().get();
			}
			
			output.collect(key, new IntWritable(total));

		}
	}


	
	/**
	 * @param args
	 */
	public static void main(String[] args) throws Exception {
		JobConf conf1 = new JobConf(heuristic.class);
		conf1.setJobName("HeuristicStage1");

		conf1.setMapOutputKeyClass(CaseID.class);
		conf1.setMapOutputValueClass(EventTimestampPair.class);
		conf1.setOutputKeyClass(EventPair.class);
		conf1.setOutputValueClass(WorkflowRelationExt.class);

		conf1.setMapperClass(Map1.class);
		conf1.setReducerClass(Reduce1.class);

		conf1.setInputFormat(TextInputFormat.class);
		conf1.setOutputFormat(SequenceFileOutputFormat.class);
		FileInputFormat.setInputPaths(conf1, new Path(args[0]));
		FileOutputFormat.setOutputPath(conf1, new Path(args[1]));

		JobClient.runJob(conf1);

		// Phase 2 follows
		JobConf conf2 = new JobConf(heuristic.class);
		conf2.setJobName("HeuristicStage2");

		conf2.setMapOutputKeyClass(EventPair.class);
		conf2.setMapOutputValueClass(WorkflowRelationExt.class);
		conf2.setOutputKeyClass(IntWritable.class);
		conf2.setOutputValueClass(DependencyRelation.class);

		conf2.setCombinerClass(Combine2.class);
		conf2.setReducerClass(Reduce2.class);

		conf2.setInputFormat(SequenceFileInputFormat.class);
		conf2.setOutputFormat(SequenceFileOutputFormat.class);
		FileInputFormat.setInputPaths(conf2, new Path(args[1]));
		FileOutputFormat.setOutputPath(conf2, new Path(args[2]));

		JobClient.runJob(conf2);

		// Phase 3 follows
		// This is on a single reducer to collect and remove duplicates
		JobConf conf3 = new JobConf(heuristic.class);
		conf3.setJobName("HeuristicStage3");
		
		if (args.length > 6)
			conf3.setFloat("DeltaA", Float.parseFloat(args[6]));
		else
			conf3.setFloat("DeltaA", 0.9f);
		if (args.length > 7)
			conf3.setFloat("DeltaL1L", Float.parseFloat(args[7]));
		else
			conf3.setFloat("DeltaL1L", 0.9f);
		if (args.length > 8)
			conf3.setFloat("DeltaL2L", Float.parseFloat(args[8]));
		else
			conf3.setFloat("DeltaL2L", 0.9f);
		if (args.length > 9)
			conf3.setFloat("DeltaLong", Float.parseFloat(args[9]));
		else
			conf3.setFloat("DeltaLong", 0.9f);
		if (args.length > 10)
			conf3.setFloat("DeltaRel", Float.parseFloat(args[10]));
		else
			conf3.setFloat("DeltaRel", 0.05f);

		conf3.setMapOutputKeyClass(IntWritable.class);
		conf3.setMapOutputValueClass(DependencyRelation.class);
		conf3.setOutputKeyClass(EventPair.class);
		conf3.setOutputValueClass(EventPair.class);

//		conf3.setMapperClass(Map3TestCase.class);
		// Set a single reduce task that collects everything
		conf3.setNumReduceTasks(1);
		conf3.setReducerClass(Reduce3.class);

		conf3.setInputFormat(SequenceFileInputFormat.class);
		conf3.setOutputFormat(SequenceFileOutputFormat.class);
		FileInputFormat.setInputPaths(conf3, new Path(args[2]));
		FileOutputFormat.setOutputPath(conf3, new Path(args[3]));
		
		JobClient.runJob(conf3);
		// Phase 4 follows
		JobConf conf4 = new JobConf(heuristic.class);
		conf4.setJobName("HeuristicStage4");
		conf4.setStrings("DGPath", args[3]);
		DistributedCache.addCacheFile(new URI(args[3] + "/part-00000"), conf4);
		conf4.setMapOutputKeyClass(CaseID.class);
		conf4.setMapOutputValueClass(EventTimestampPair.class);
		conf4.setMapperClass(Map1.class);

		conf4.setOutputKeyClass(CNetAugmentation.class);
		conf4.setOutputValueClass(IntWritable.class);
		conf4.setReducerClass(Reduce4.class);
		
		FileInputFormat.setInputPaths(conf4, new Path(args[0]));
		FileOutputFormat.setOutputPath(conf4, new Path(args[4]));

		conf4.setOutputFormat(SequenceFileOutputFormat.class);
		JobClient.runJob(conf4);
		// Phase 5 follows
		JobConf conf5 = new JobConf(heuristic.class);
		conf5.setJobName("HeuristicStage5");

		conf5.setMapOutputKeyClass(CNetAugmentation.class);
		conf5.setMapOutputValueClass(IntWritable.class);
		conf5.setOutputKeyClass(CNetAugmentation.class);
		conf5.setOutputValueClass(IntWritable.class);
		conf5.setCombinerClass(Reduce5.class);
		// Set a single reduce task that collects everything
		conf5.setNumReduceTasks(1);
		conf5.setReducerClass(Reduce5.class);
		conf5.setInputFormat(SequenceFileInputFormat.class);
		FileInputFormat.setInputPaths(conf5, new Path(args[4]));
		conf5.setOutputFormat(TextOutputFormat.class);
		FileOutputFormat.setOutputPath(conf5, new Path(args[5]));

		JobClient.runJob(conf5);
	}
	
// Helpers to print an array	
	static String ArrayToString(Object[] a, String div) {
		String r = new String();
		for (Object i : a) { r += i + div; }
		return r;
	}
	static String ArrayToString(Object[] a) {
		return ArrayToString(a, " // ");
	}
}