/* 
mrAlpha - A Map-Reduce based implementation of the alpha workflow discovery algorithm
Copyright (C) 2013-2014  Joerg Evermann

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
*/

package mrAlpha;

import java.io.IOException;
import java.sql.Timestamp;
import java.util.*;

import mrAlpha.WorkflowRelation.WfRType;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;

public class alpha {

	public static class Map1 extends MapReduceBase implements
			Mapper<LongWritable, Text, CaseID, EventTimestampPair> {

		private EventTimestampPair etPair = new EventTimestampPair();
		private CaseID caseID = new CaseID();

		public void map(LongWritable key, Text value,
				OutputCollector<CaseID, EventTimestampPair> output,
				Reporter reporter) throws IOException {
			String line = value.toString();
			StringTokenizer tokenizer = new StringTokenizer(line, "\t");
			
			if (tokenizer.countTokens() == 3) {
				caseID.set(new Text(tokenizer.nextToken()));
				etPair.set(tokenizer.nextToken(),
						Timestamp.valueOf(tokenizer.nextToken()));
				output.collect(caseID, etPair);
			}
		}
	}

	public static class Reduce1 extends MapReduceBase implements
			Reducer<CaseID, EventTimestampPair, EventPair, FollowsRelation> {

		public void reduce(CaseID key, Iterator<EventTimestampPair> values,
				OutputCollector<EventPair, FollowsRelation> output,
				Reporter reporter) throws IOException {

			FollowsRelation fr = new FollowsRelation();
			EventPair ep = new EventPair();

			// read it all into memory, sorted by timestamp and events
			SortedMap<EventTimestampPair, String> timeSorted = new TreeMap<EventTimestampPair, String>();
			SortedSet<String> events = new TreeSet<String>();

			while (values.hasNext()) {
				EventTimestampPair etPair = new EventTimestampPair(values.next());
				timeSorted.put(etPair, etPair.getEvent());
				events.add(etPair.getEvent());
			}

			Object[] eventArray = events.toArray();
			int numUniqueEvents = eventArray.length;
			
			boolean[][] Follows = new boolean[numUniqueEvents][numUniqueEvents];

			// iterate over the sorted list to generate the follows relation
			Iterator<EventTimestampPair> iter = timeSorted.keySet().iterator();
			String ev1, ev2;
			int ev1idx, ev2idx;

			if (iter.hasNext()) {
				ev1 = timeSorted.get(iter.next());
				while (iter.hasNext()) {
					ev2 = timeSorted.get(iter.next());
					// ev2 follows ev1
					ev1idx = Arrays.binarySearch(eventArray, ev1);
					ev2idx = Arrays.binarySearch(eventArray, ev2);
					Follows[ev2idx][ev1idx] = true;
					ev1 = new String(ev2);
				}
			}

			for (int i = 0; i < numUniqueEvents; i++)
				for (int j = 0; j < numUniqueEvents; j++) {
					ev1 = eventArray[i].toString();
					ev2 = eventArray[j].toString();
					if (i < j) {
						ep.set(ev1, ev2);
						fr.set(Follows[i][j], true);
					} else {
						ep.set(ev2, ev1);
						fr.set(Follows[i][j], false);
					}
					output.collect(ep, fr);
					if (i == j) {
						ep.set(ev2, ev1);
						fr.set(Follows[i][j], true);						
						output.collect(ep, fr);
					}
				}
		}
	}

	public static class Combine2 extends MapReduceBase implements 
			Reducer<EventPair, FollowsRelation, EventPair, FollowsRelation> {

		public void reduce (EventPair key, Iterator<FollowsRelation> values,
				OutputCollector<EventPair, FollowsRelation> output,
				Reporter reporter) throws IOException {
			
			boolean seenFp = false;
			boolean seenNFp = false;
			boolean seenFm = false;
			boolean seenNFm = false;
			
			while (values.hasNext()) {
				FollowsRelation fr = values.next();
				if (fr.toString().equals("F+"))
					seenFp = true;
				if (fr.toString().equals("NF+"))
					seenNFp = true;
				if (fr.toString().equals("F-"))
					seenFm = true;
				if (fr.toString().equals("NF-"))
					seenNFm = true;
			}
			
			if (seenFp)  output.collect(key, new FollowsRelation(true, true));
			if (seenNFp) output.collect(key, new FollowsRelation(false, true));
			if (seenFm)  output.collect(key, new FollowsRelation(true, false));
			if (seenNFm) output.collect(key, new FollowsRelation(false, false));			
		}
	}

	public static class Reduce2 extends MapReduceBase implements 
			Reducer<EventPair, FollowsRelation, EventPair, WorkflowRelation> {

		public void reduce (EventPair key, Iterator<FollowsRelation> values,
				OutputCollector<EventPair, WorkflowRelation> output,
				Reporter reporter) throws IOException {

			boolean forward = false;
			boolean backward = false;
			
			while (values.hasNext()) {
				FollowsRelation fr = values.next();
				if (fr.toString().equals("F+"))
					forward = true;
				if (fr.toString().equals("F-"))
					backward = true;
			}
			
			if (forward && !backward)
				output.collect(key, new WorkflowRelation(key.getEvent1(), key.getEvent2(), WorkflowRelation.WfRType.CAUSAL));
			if (backward && !forward)
				output.collect(new EventPair(key.getEvent2(), key.getEvent1()), new WorkflowRelation(key.getEvent2(), key.getEvent1(), WorkflowRelation.WfRType.CAUSAL));
			if (forward && backward)
				output.collect(key, new WorkflowRelation(key.getEvent1(), key.getEvent2(), WorkflowRelation.WfRType.PARALLEL));
			if (!forward && !backward)
				output.collect(key, new WorkflowRelation(key.getEvent1(), key.getEvent2(), WorkflowRelation.WfRType.UNRELATED));
		}
	}

	public static class Reduce3 extends MapReduceBase implements 
	Reducer<EventPair, WorkflowRelation, EventPair, WorkflowRelation> {

		public void reduce (EventPair key, Iterator<WorkflowRelation> values,
				OutputCollector<EventPair, WorkflowRelation> output,
				Reporter reporter) throws IOException {

			boolean seenCausal = false;
			boolean seenParallel = false;
			boolean seenUnrelated = false;

			while (values.hasNext()) {
				WorkflowRelation wfr = values.next();
				if (wfr.getType() == WfRType.CAUSAL)
					seenCausal = true;
				if (wfr.getType() == WfRType.PARALLEL)
					seenParallel = true;
				if (wfr.getType() == WfRType.UNRELATED)
					seenUnrelated = true;
			}

			if (seenCausal)  output.collect(key, new WorkflowRelation(key, WfRType.CAUSAL));
			if (seenParallel) output.collect(key, new WorkflowRelation(key, WfRType.PARALLEL));
			if (seenUnrelated)  output.collect(key, new WorkflowRelation(key, WfRType.UNRELATED));
		}
	}

	/**
	 * @param args
	 */
	public static void main(String[] args) throws Exception {
		JobConf conf1 = new JobConf(alpha.class);
		conf1.setJobName("AlphaStage1");

		conf1.setMapOutputKeyClass(CaseID.class);
		conf1.setMapOutputValueClass(EventTimestampPair.class);
		conf1.setOutputKeyClass(EventPair.class);
		conf1.setOutputValueClass(FollowsRelation.class);

		conf1.setMapperClass(Map1.class);
		conf1.setReducerClass(Reduce1.class);

		conf1.setInputFormat(TextInputFormat.class);
		conf1.setOutputFormat(SequenceFileOutputFormat.class);
		FileInputFormat.setInputPaths(conf1, new Path(args[0]));
		FileOutputFormat.setOutputPath(conf1, new Path(args[1]));
		
//		FileOutputFormat.setCompressOutput(conf1, true);
//		conf1.setCompressMapOutput(true);
//	    conf1.setMapOutputCompressorClass(GzipCodec.class);

		JobClient.runJob(conf1);

// Phase 2 follows
		JobConf conf2 = new JobConf(alpha.class);
		conf2.setJobName("AlphaStage2");

		conf2.setMapOutputKeyClass(EventPair.class);
		conf2.setMapOutputValueClass(FollowsRelation.class);
		conf2.setOutputKeyClass(EventPair.class);
		conf2.setOutputValueClass(WorkflowRelation.class);

		conf2.setCombinerClass(Combine2.class);
		conf2.setReducerClass(Reduce2.class);

		conf2.setInputFormat(SequenceFileInputFormat.class);
		conf2.setOutputFormat(SequenceFileOutputFormat.class);
		FileInputFormat.setInputPaths(conf2, new Path(args[1]));
		FileOutputFormat.setOutputPath(conf2, new Path(args[2]));

		JobClient.runJob(conf2);
		
// Phase 3 follows
		JobConf conf3 = new JobConf(alpha.class);
		conf3.setJobName("AlphaStage3");
		
		conf3.setMapOutputKeyClass(EventPair.class);
		conf3.setOutputValueClass(WorkflowRelation.class);
		conf3.setOutputKeyClass(EventPair.class);
		conf3.setOutputValueClass(WorkflowRelation.class);

		conf3.setCombinerClass(Reduce3.class);
		conf3.setReducerClass(Reduce3.class);
		
		conf3.setInputFormat(SequenceFileInputFormat.class);
		conf3.setOutputFormat(TextOutputFormat.class);
		FileInputFormat.setInputPaths(conf3, new Path(args[2]));
		FileOutputFormat.setOutputPath(conf3, new Path(args[3]));
		conf3.setNumReduceTasks(1);
		
		JobClient.runJob(conf3);
	}
}
