/*
 * Created on Dec 27, 2006
 *
 */

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;

public class FieldsToSpreadSheet {
   

    static String fieldNameOf(String s) {
        int colonAt = s.indexOf(":");
        int spaceAt = s.indexOf(" ");
        if (colonAt == -1) return null;
        if (spaceAt == -1 || spaceAt > colonAt)
            return s.substring(0, colonAt).trim();
        return null;
    }
    
    static String dataOf(String s) {
        int colonAt = s.indexOf(":");
        if (colonAt == -1) return null;
        return s.substring(colonAt+1).trim();
    }
    
    static String readNonEmptyLine(BufferedReader r) throws IOException {
            String s = r.readLine();
            while (s != null && s.trim().length() == 0 && !endOfRecord(s)) {
                s = r.readLine();
            }
            return s;
        }

    /**
     * @param s
     * @return
     */
    private static boolean endOfRecord(String s) {
        return s.length() > 0 && s.charAt(0) == '\f';
    }
    
    /**
     * Removes tabs from data (to avoid confusing the tab-separated form).
     * @param s
     * @return
     */
    static String tabFree(String s) {
        if (! s.contains("\t"))
            return s;
        return s.replaceAll("\t", " ");
    }
    
    /**
     * Reads a series of field_name: prefixed lines, and enters those lines into
     * map m.  A line not prefixed with a space-free field name is assumed to be
     * part of the next line's data.  A record ends with a line containing a line-feed
     * as its first character, and the rest of that line is be ignored.
     * 
     * @param r
     * @param m
     * @return
     * @throws IOException
     */
        static Map<String, String> readRecord(BufferedReader r,
                List<String> fields,
                Set<String> fieldsSeen) throws IOException {
            String s = readNonEmptyLine(r);
            if (s == null )
                return null;
            String f = fieldNameOf(s);
            String data = dataOf(s);
            
            s = readNonEmptyLine(r);
            
            Map<String, String> m = new HashMap<String, String>();
            while (s != null && !endOfRecord(s)) {
                String nf = fieldNameOf(s);
                if (nf == null) {
                    data += " " + s.trim();
                } else {
                    if (! fieldsSeen.contains(f)) {
                        fields.add(f);
                        fieldsSeen.add(f);
                    }
                    m.put(f, tabFree(data));
                    f = nf;
                    data = dataOf(s);
                }
                s = readNonEmptyLine(r);
                
            }
            if (! fieldsSeen.contains(f)) {
                fields.add(f);
                fieldsSeen.add(f);
            }
            m.put(f, tabFree(data));
            return m;
        }
    
        static List<Map<String, String>> readFile(String file_name, ArrayList<String> fields, Set<String> fieldsSeen) throws Throwable {
            ArrayList<Map<String, String>> a = new ArrayList<Map<String, String>>();
            BufferedReader r = new BufferedReader(new FileReader(file_name));
           Map<String, String> m = readRecord(r, fields, fieldsSeen);
            while (m != null) {
                a.add(m);
                m = readRecord(r, fields, fieldsSeen);
            }
            return a;
        }

        public static void main(String[] args) throws Throwable {
            // java FieldsToSpreadSheets field_file

            if (args.length != 2) {
                System.err
                        .println("Usage: java FieldsToSpreadSheets fields_file tabs_file\n"
                                + "Reads field_name_prefixed lines from main, and acumulates them into a tab-separated spreadsheet.\n"
                                + "A field_name is a leading string containing no spaces followed by a colon.  The data is everything betweeen that and the next field name."
                                + "Records are separated by lines containing only a line feed."
                                );
                System.exit(1);
            }
            ArrayList<String> fields = new ArrayList<String>();
            HashSet<String> fieldset = new HashSet<String>();
            
            List<Map<String, String>> a = readFile(args[0], fields, fieldset);
            BufferedWriter result = new BufferedWriter(new FileWriter(args[1]));
            writeRecords(result, a, fields);
            result.close();
            
        }

        private static void writeRecords(BufferedWriter result, List<Map<String, String>> a, ArrayList<String> fields) throws IOException {
            String sep = "";
            for (String f : fields) {
                result.write(sep);
                result.write(f);
                sep = "\t";
            }
            result.newLine();
            
            int i = 0;
            for (Map<String, String> m : a) {
                sep = "";
                for (String f : fields) {
                    result.write(sep);
                    String r = m.get(f);
                    if (r == null) {
                        r = "";
                    }
                    result.write(r.trim());
                    sep = "\t";
                }
                i++;
                result.newLine();
                
            }
        }
}
