package de.up.ling.irtg.script;

import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableMap;
import de.up.ling.irtg.Interpretation;
import de.up.ling.irtg.InterpretedTreeAutomaton;
import de.up.ling.irtg.algebra.Algebra;
import de.up.ling.irtg.algebra.BinarizingTagTreeAlgebra;
import de.up.ling.irtg.algebra.ParserException;
import de.up.ling.irtg.algebra.TagStringAlgebra;
import de.up.ling.irtg.binarization.BinarizingAlgebraSeed;
import de.up.ling.irtg.binarization.BkvBinarizer;
import de.up.ling.irtg.binarization.IdentitySeed;
import de.up.ling.irtg.codec.tag.ChenTagInputCodec;
import de.up.ling.irtg.codec.tag.ElementaryTree;
import de.up.ling.irtg.codec.tag.TagGrammar;
import de.up.ling.irtg.corpus.AbstractCorpusWriter;
import de.up.ling.irtg.corpus.Corpus;
import de.up.ling.irtg.corpus.CorpusWriter;
import de.up.ling.irtg.corpus.Instance;
import de.up.ling.irtg.script.PennTreebankConverter;
import de.up.ling.irtg.util.GuiUtils;
import de.up.ling.tree.Tree;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.math3.dfp.Dfp;

/* loaded from: input_file:de/up/ling/irtg/script/ChenTagTreebankConverter.class */
public class ChenTagTreebankConverter {
    private static JCommander jc;
    private static InterpretedTreeAutomaton irtg;

    /* loaded from: input_file:de/up/ling/irtg/script/ChenTagTreebankConverter$CmdLineParameters.class */
    private static class CmdLineParameters {

        @Parameter
        public List<String> inputFiles;

        @Parameter(names = {"--out-corpus", "-oc"}, description = "Filename to which the corpus will be written.")
        public String outCorpusFilename;

        @Parameter(names = {"--out-grammar", "-og"}, description = "Filename to which the grammar will be written.")
        public String outGrammarFilename;

        @Parameter(names = {"--in-grammar", "-ig"}, description = "Filename of the TAG grammar we will read.", required = true)
        public String inGrammarFilename;

        @Parameter(names = {"--out-automaton", "-oa"}, description = "Filename to which the tree automaton will be written.")
        public String outAutomatonFilename;

        @Parameter(names = {"--lowercase"}, description = "Convert all words to lowercase.")
        public boolean lowercase;

        @Parameter(names = {"--binarize"}, description = "Binarize the output grammar.")
        public boolean binarize;

        @Parameter(names = {"--binarization-mode"}, description = "Binarization mode (complete/xbar/inside).", validateWith = PennTreebankConverter.BinarizationStyleValidator.class)
        public String binarizationMode;

        @Parameter(names = {"--verbose"}, description = "Print some debugging output.")
        public boolean verbose;

        @Parameter(names = {"--help"}, help = true, description = "Prints usage information.")
        private boolean help;

        @Parameter(names = {"--len"}, description = "Maximum length of an input")
        public int maxLen;

        private CmdLineParameters() {
            this.inputFiles = new ArrayList();
            this.outCorpusFilename = "out.txt";
            this.outGrammarFilename = "out.irtg";
            this.inGrammarFilename = null;
            this.outAutomatonFilename = "out.auto";
            this.lowercase = false;
            this.binarize = false;
            this.binarizationMode = "complete";
            this.verbose = false;
            this.maxLen = Dfp.RADIX;
        }

        AbstractCorpusWriter corpusWriterFromFilename(String[] strArr) throws IOException {
            return new CorpusWriter(ChenTagTreebankConverter.irtg, "Converted on " + new Date().toString() + "\nArgs = " + Joiner.on(" ").join(strArr), "/// ", new FileWriter(this.outCorpusFilename));
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    public static void main(String[] strArr) throws FileNotFoundException, IOException, ParserException, Exception {
        CmdLineParameters cmdLineParameters = new CmdLineParameters();
        jc = new JCommander(cmdLineParameters, strArr);
        if (cmdLineParameters.help) {
            usage(null);
        }
        if (cmdLineParameters.inputFiles.isEmpty()) {
            usage("No input files specified.");
        }
        ChenTagInputCodec chenTagInputCodec = new ChenTagInputCodec();
        TagGrammar readUnlexicalizedGrammar = chenTagInputCodec.readUnlexicalizedGrammar(new FileReader(cmdLineParameters.inGrammarFilename));
        readUnlexicalizedGrammar.setTracePredicate(str -> {
            return str.contains("-NONE-");
        });
        List<Tree<String>> lexicalizeFromCorpus = chenTagInputCodec.lexicalizeFromCorpus(readUnlexicalizedGrammar, new FileReader(cmdLineParameters.inputFiles.get(0)));
        PrintWriter printWriter = new PrintWriter("tagg.txt");
        printWriter.println(readUnlexicalizedGrammar);
        printWriter.flush();
        printWriter.close();
        PrintWriter printWriter2 = new PrintWriter("lexicalized-tagg.txt");
        printWriter2.println("\n\n");
        for (String str2 : readUnlexicalizedGrammar.getWords()) {
            printWriter2.println("\nword: " + str2 + "\n==================\n");
            Iterator<ElementaryTree> it2 = readUnlexicalizedGrammar.lexicalizeElementaryTrees(str2).iterator();
            while (it2.hasNext()) {
                printWriter2.println("   " + it2.next());
            }
        }
        printWriter2.flush();
        printWriter2.close();
        irtg = readUnlexicalizedGrammar.toIrtg();
        System.err.println("\nMaximum likelihood estimation ...");
        Corpus corpus = new Corpus();
        Interpretation interpretation = irtg.getInterpretation("string");
        Interpretation interpretation2 = irtg.getInterpretation("tree");
        for (Tree<String> tree : lexicalizeFromCorpus) {
            Instance instance = new Instance();
            instance.setDerivationTree(irtg.getAutomaton().getSignature().mapSymbolsToIds(tree));
            instance.setInputObjects(ImmutableMap.of("string", interpretation.interpret(tree), "tree", interpretation2.interpret(tree)));
            corpus.addInstance(instance);
        }
        PrintWriter printWriter3 = new PrintWriter(cmdLineParameters.outCorpusFilename);
        AbstractCorpusWriter corpusWriterFromFilename = cmdLineParameters.corpusWriterFromFilename(strArr);
        corpusWriterFromFilename.setAnnotated(true);
        corpusWriterFromFilename.writeCorpus(corpus);
        printWriter3.flush();
        printWriter3.close();
        irtg.trainML(corpus);
        if (cmdLineParameters.binarize) {
            System.err.println("\nBinarizing IRTG ...");
            ImmutableMap of = ImmutableMap.of("string", (BinarizingTagTreeAlgebra) new TagStringAlgebra(), "tree", new BinarizingTagTreeAlgebra());
            BkvBinarizer bkvBinarizer = new BkvBinarizer(ImmutableMap.of("string", (BinarizingAlgebraSeed) new IdentitySeed(irtg.getInterpretation("string").getAlgebra(), (Algebra) of.get("string")), "tree", new BinarizingAlgebraSeed(irtg.getInterpretation("tree").getAlgebra(), (Algebra) of.get("tree"))), PennTreebankConverter.makeRuleFactoryFactory(cmdLineParameters.binarizationMode));
            irtg = (InterpretedTreeAutomaton) GuiUtils.withConsoleProgressBar(60, System.out, progressListener -> {
                return bkvBinarizer.binarize(irtg, of, progressListener);
            });
        }
        PrintWriter printWriter4 = new PrintWriter(cmdLineParameters.outGrammarFilename);
        printWriter4.println(irtg);
        printWriter4.flush();
        printWriter4.close();
    }

    private static void usage(String str) {
        if (jc != null) {
            if (str != null) {
                System.out.println("No input files specified.");
            }
            jc.setProgramName("java -cp <alto.jar> de.up.ling.irtg.script.ChenTagTreebankConverter <inputfiles>");
            jc.usage();
            if (str != null) {
                System.exit(1);
            } else {
                System.exit(0);
            }
        }
    }
}
