package de.up.ling.irtg.script;

import com.beust.jcommander.IParameterValidator;
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import com.beust.jcommander.ParameterException;
import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import de.saar.basic.StringTools;
import de.up.ling.irtg.InterpretedTreeAutomaton;
import de.up.ling.irtg.algebra.Algebra;
import de.up.ling.irtg.algebra.BinarizingTreeWithAritiesAlgebra;
import de.up.ling.irtg.algebra.StringAlgebra;
import de.up.ling.irtg.algebra.TreeWithAritiesAlgebra;
import de.up.ling.irtg.automata.ConcreteTreeAutomaton;
import de.up.ling.irtg.automata.Rule;
import de.up.ling.irtg.automata.TreeAutomaton;
import de.up.ling.irtg.binarization.BinarizingAlgebraSeed;
import de.up.ling.irtg.binarization.BinaryRuleFactory;
import de.up.ling.irtg.binarization.BkvBinarizer;
import de.up.ling.irtg.binarization.GensymBinaryRuleFactory;
import de.up.ling.irtg.binarization.InsideRuleFactory;
import de.up.ling.irtg.binarization.StringAlgebraSeed;
import de.up.ling.irtg.binarization.XbarRuleFactory;
import de.up.ling.irtg.codec.PtbTreeInputCodec;
import de.up.ling.irtg.codec.PtbTreeOutputCodec;
import de.up.ling.irtg.corpus.AbstractCorpusWriter;
import de.up.ling.irtg.corpus.CorpusConverter;
import de.up.ling.irtg.corpus.CorpusWriter;
import de.up.ling.irtg.signature.Interner;
import de.up.ling.irtg.util.GuiUtils;
import de.up.ling.irtg.util.Util;
import de.up.ling.tree.ParseException;
import de.up.ling.tree.Tree;
import de.up.ling.tree.TreeParser;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import net.didion.jwnl.princeton.file.PrincetonRandomAccessDictionaryFile;
import org.apache.commons.math3.dfp.Dfp;
import org.springframework.jdbc.datasource.init.ScriptUtils;
import org.springframework.transaction.interceptor.RuleBasedTransactionAttribute;

/* loaded from: input_file:de/up/ling/irtg/script/PennTreebankConverter.class */
public class PennTreebankConverter {
    private static JCommander jc;
    private static final Map<String, Algebra> algebras = ImmutableMap.of("string", (TreeWithAritiesAlgebra) new StringAlgebra(), "tree", new TreeWithAritiesAlgebra());
    private static InterpretedTreeAutomaton irtg = InterpretedTreeAutomaton.forAlgebras(algebras);
    private static Pattern ANNOTATION_PATTERN = Pattern.compile("([^\\-=]+)([\\-=].*)");
    private static Function<String, String> STRIP_ANNOTATIONS = str -> {
        Matcher matcher = ANNOTATION_PATTERN.matcher(str);
        return matcher.matches() ? matcher.group(1) : str;
    };
    private static Function<String, String> REMOVE_NONE = str -> {
        if (str.startsWith(RuleBasedTransactionAttribute.PREFIX_ROLLBACK_RULE) || str.startsWith("*")) {
            return null;
        }
        return str;
    };

    /* loaded from: input_file:de/up/ling/irtg/script/PennTreebankConverter$BinarizationStyleValidator.class */
    public static class BinarizationStyleValidator implements IParameterValidator {
        private static final List<String> allowed = Lists.newArrayList("complete", "xbar", "inside");

        @Override // com.beust.jcommander.IParameterValidator
        public void validate(String str, String str2) throws ParameterException {
            if (!allowed.contains(str2)) {
                throw new ParameterException("Invalid value for argument 'binarizationMode'. Allowed values are: " + allowed);
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:de/up/ling/irtg/script/PennTreebankConverter$CmdLineParameters.class */
    public static class CmdLineParameters {

        @Parameter
        public List<String> inputFiles;

        @Parameter(names = {"--out-corpus", "-oc"}, description = "Filename to which the corpus will be written.")
        public String outCorpusFilename;

        @Parameter(names = {"--out-grammar", "-og"}, description = "Filename to which the grammar will be written.")
        public String outGrammarFilename;

        @Parameter(names = {"--out-automaton", "-oa"}, description = "Filename to which the tree automaton will be written.")
        public String outAutomatonFilename;

        @Parameter(names = {"--compress-ptb"}, description = "Also write PTB trees into given file, one tree per line (for evalb).")
        public String compressPtb;

        @Parameter(names = {"--strip-annotations"}, description = "Convert NP-SBJ to NP etc.")
        public boolean stripAnnotations;

        @Parameter(names = {"--remove-none"}, description = "Remove empty elements such as -NONE-.")
        public boolean removeNone;

        @Parameter(names = {"--add-top"}, description = "Add a TOP symbol on top of every parse tree.")
        public boolean addTop;

        @Parameter(names = {"--pos"}, description = "Remove all leaves, yielding strings of POS tags.")
        public boolean removeLeaves;

        @Parameter(names = {"--binarize"}, description = "Binarize the output grammar.")
        public boolean binarize;

        @Parameter(names = {"--binarization-mode"}, description = "Binarization mode (complete/xbar/inside).", validateWith = {BinarizationStyleValidator.class})
        public String binarizationMode;

        @Parameter(names = {"--verbose"}, description = "Print some debugging output.")
        public boolean verbose;

        @Parameter(names = {"--help"}, help = true, description = "Prints usage information.")
        private boolean help;

        @Parameter(names = {"--len"}, description = "Maximum length of an input")
        public int maxLen;

        private CmdLineParameters() {
            this.inputFiles = new ArrayList();
            this.outCorpusFilename = "out.txt";
            this.outGrammarFilename = "out.irtg";
            this.outAutomatonFilename = "out.auto";
            this.compressPtb = null;
            this.stripAnnotations = false;
            this.removeNone = false;
            this.addTop = false;
            this.removeLeaves = false;
            this.binarize = false;
            this.binarizationMode = "complete";
            this.verbose = false;
            this.maxLen = Dfp.RADIX;
        }

        AbstractCorpusWriter corpusWriterFromFilename(String[] strArr) throws IOException {
            return new CorpusWriter(PennTreebankConverter.irtg, "Converted on " + new Date().toString() + "\nArgs = " + Joiner.on(" ").join(strArr), "/// ", new FileWriter(this.outCorpusFilename));
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:de/up/ling/irtg/script/PennTreebankConverter$DerivationTreeMaker.class */
    public static class DerivationTreeMaker implements Function<Tree<String>, Tree<Integer>> {
        Interner<PtbRule> seenRules = new Interner<>();
        private ConcreteTreeAutomaton<String> auto = (ConcreteTreeAutomaton) PennTreebankConverter.irtg.getAutomaton();
        private boolean skipLeaves;

        public DerivationTreeMaker(boolean z) {
            this.skipLeaves = z;
        }

        @Override // java.util.function.Function
        public Tree<Integer> apply(Tree<String> tree) {
            return PennTreebankConverter.irtg.getAutomaton().getSignature().addAllSymbols((Tree) tree.dfs((tree2, list) -> {
                Tree create;
                if (this.skipLeaves && tree2.getChildren().isEmpty()) {
                    return null;
                }
                String str = PrincetonRandomAccessDictionaryFile.READ_ONLY + this.seenRules.addObject(new PtbRule((String) tree2.getLabel(), Util.mapToList(tree2.getChildren(), tree2 -> {
                    return (String) tree2.getLabel();
                })));
                ArrayList arrayList = new ArrayList();
                ArrayList arrayList2 = new ArrayList();
                ArrayList arrayList3 = new ArrayList();
                int i = 1;
                for (int i2 = 0; i2 < list.size(); i2++) {
                    Tree tree3 = (Tree) list.get(i2);
                    Tree tree4 = (Tree) tree2.getChildren().get(i2);
                    if (tree3 == null) {
                        create = tree4;
                    } else {
                        int i3 = i;
                        i++;
                        create = Tree.create("?" + i3, new Tree[0]);
                    }
                    arrayList3.add(create);
                    if (tree3 != null) {
                        arrayList.add(tree4.getLabel());
                        arrayList2.add(tree3);
                    }
                }
                Rule createRule = this.auto.createRule((TreeAutomaton) tree2.getLabel(), str, (List<TreeAutomaton>) arrayList);
                this.auto.addRule(createRule);
                PennTreebankConverter.irtg.getInterpretation("string").getHomomorphism().add(str, Tree.create(list.isEmpty() ? (String) tree2.getLabel() : "conc" + arrayList3.size(), arrayList3));
                PennTreebankConverter.irtg.getInterpretation("tree").getHomomorphism().add(str, TreeWithAritiesAlgebra.addArities(Tree.create(tree2.getLabel(), arrayList3)));
                PennTreebankConverter.irtg.getInterpretation("string").getHomomorphism().get(createRule.getLabel());
                if (tree2 == tree) {
                    this.auto.addFinalState(createRule.getParent());
                }
                return Tree.create(str, arrayList2);
            }));
        }
    }

    /* loaded from: input_file:de/up/ling/irtg/script/PennTreebankConverter$PtbRule.class */
    private static class PtbRule {
        String lhs;
        List<String> rhs;

        public PtbRule(String str, List<String> list) {
            this.lhs = str;
            this.rhs = list;
        }

        public int hashCode() {
            return (47 * ((47 * 7) + Objects.hashCode(this.lhs))) + Objects.hashCode(this.rhs);
        }

        public boolean equals(Object obj) {
            if (obj == null || getClass() != obj.getClass()) {
                return false;
            }
            PtbRule ptbRule = (PtbRule) obj;
            return Objects.equals(this.lhs, ptbRule.lhs) && Objects.equals(this.rhs, ptbRule.rhs);
        }

        public String toString() {
            return this.lhs + " -> " + this.rhs;
        }
    }

    public static void main(String[] strArr) throws Exception {
        CmdLineParameters cmdLineParameters = new CmdLineParameters();
        jc = new JCommander(cmdLineParameters, strArr);
        if (cmdLineParameters.help) {
            usage(null);
        }
        if (cmdLineParameters.inputFiles.isEmpty()) {
            usage("No input files specified.");
        }
        convert(cmdLineParameters.corpusWriterFromFilename(strArr), cmdLineParameters);
    }

    /* JADX WARN: Multi-variable type inference failed */
    public static void convert(AbstractCorpusWriter abstractCorpusWriter, CmdLineParameters cmdLineParameters) throws Exception {
        PtbTreeInputCodec ptbTreeInputCodec = new PtbTreeInputCodec();
        CorpusConverter corpusConverter = new CorpusConverter(abstractCorpusWriter, ImmutableMap.of("string", tree -> {
            return tree.getLeafLabels();
        }, "tree", tree2 -> {
            return tree2;
        }));
        corpusConverter.setDerivationTreeMaker(new DerivationTreeMaker(!cmdLineParameters.removeLeaves));
        if (cmdLineParameters.verbose) {
            System.err.println("Input files: " + Joiner.on(" ").join(cmdLineParameters.inputFiles));
            System.err.println("Output grammar: " + cmdLineParameters.outGrammarFilename);
            System.err.println("Output corpus: " + cmdLineParameters.outCorpusFilename);
        }
        if (cmdLineParameters.stripAnnotations) {
            corpusConverter.addTransformation(tnl(STRIP_ANNOTATIONS));
            if (cmdLineParameters.verbose) {
                System.err.println("- strip annotations");
            }
        }
        if (cmdLineParameters.removeNone) {
            corpusConverter.addTransformation(tnl(REMOVE_NONE));
            if (cmdLineParameters.verbose) {
                System.err.println("- remove NONE");
            }
        }
        if (cmdLineParameters.addTop) {
            corpusConverter.addTransformation(tree3 -> {
                return Tree.create("TOP", tree3);
            });
            if (cmdLineParameters.verbose) {
                System.err.println("- add TOP");
            }
        }
        if (cmdLineParameters.removeLeaves) {
            corpusConverter.addTransformation(makeWordsPos());
            if (cmdLineParameters.verbose) {
                System.err.println("- remove leaves");
            }
        }
        if (cmdLineParameters.verbose) {
            System.err.println();
        }
        withPtbCompressionConsumer(cmdLineParameters, corpusConverter, consumer -> {
            try {
                long j = 0;
                long j2 = 1;
                for (String str : cmdLineParameters.inputFiles) {
                    FileInputStream fileInputStream = new FileInputStream(str);
                    System.err.println("Processing " + str + " ...");
                    for (Tree<String> tree4 : ptbTreeInputCodec.readCorpus(fileInputStream)) {
                        try {
                            if (tree4.getLeafLabels().size() <= cmdLineParameters.maxLen) {
                                consumer.accept(tree4);
                            } else {
                                j++;
                            }
                            j2++;
                        } catch (Exception e) {
                            System.err.println("Exception while reading instance #" + j2);
                            System.err.println(tree4);
                            System.err.println("\nCause:");
                            System.err.println(e);
                            e.printStackTrace(System.err);
                            System.exit(3);
                        }
                    }
                }
                abstractCorpusWriter.close();
                if (j > 0) {
                    System.err.println(String.format("Done: %d instances, %d skipped for length.", Long.valueOf(j2), Long.valueOf(j)));
                } else {
                    System.err.println(String.format("Done: %d instances.", Long.valueOf(j2)));
                }
            } catch (Exception e2) {
                System.err.println(e2);
                e2.printStackTrace(System.err);
                System.exit(3);
            }
        });
        System.err.println("\nEstimate IRTG weights with Maximum Likelihood on new corpus ...");
        irtg.trainML(irtg.readCorpus(new FileReader(cmdLineParameters.outCorpusFilename)));
        System.err.println("Done.");
        if (cmdLineParameters.binarize) {
            System.err.println("\nBinarizing IRTG ...");
            ImmutableMap of = ImmutableMap.of("string", (BinarizingTreeWithAritiesAlgebra) new StringAlgebra(), "tree", new BinarizingTreeWithAritiesAlgebra());
            BkvBinarizer bkvBinarizer = new BkvBinarizer(ImmutableMap.of("string", (BinarizingAlgebraSeed) new StringAlgebraSeed(irtg.getInterpretation("string").getAlgebra(), (Algebra) of.get("string")), "tree", new BinarizingAlgebraSeed(irtg.getInterpretation("tree").getAlgebra(), (Algebra) of.get("tree"))), makeRuleFactoryFactory(cmdLineParameters.binarizationMode));
            irtg = (InterpretedTreeAutomaton) GuiUtils.withConsoleProgressBar(60, System.out, progressListener -> {
                return bkvBinarizer.binarize(irtg, of, progressListener);
            });
        }
        FileWriter fileWriter = new FileWriter(cmdLineParameters.outGrammarFilename);
        fileWriter.write(irtg.toString());
        fileWriter.flush();
        fileWriter.close();
        irtg.getAutomaton().dumpToFile(cmdLineParameters.outAutomatonFilename);
        System.err.println("Done.");
    }

    private static void withPtbCompressionConsumer(CmdLineParameters cmdLineParameters, CorpusConverter<Tree<String>> corpusConverter, Consumer<Consumer<Tree<String>>> consumer) throws IOException {
        if (cmdLineParameters.compressPtb == null) {
            consumer.accept(corpusConverter);
            return;
        }
        FileWriter fileWriter = new FileWriter(cmdLineParameters.compressPtb);
        PtbTreeOutputCodec ptbTreeOutputCodec = new PtbTreeOutputCodec();
        corpusConverter.addConsumer(tree -> {
            try {
                fileWriter.write(ptbTreeOutputCodec.asString(tree));
                fileWriter.write(ScriptUtils.FALLBACK_STATEMENT_SEPARATOR);
            } catch (IOException e) {
                System.err.println(e);
                System.exit(2);
            }
        });
        consumer.accept(corpusConverter);
        fileWriter.flush();
        fileWriter.close();
    }

    public static Function<InterpretedTreeAutomaton, BinaryRuleFactory> makeRuleFactoryFactory(String str) {
        boolean z = -1;
        switch (str.hashCode()) {
            case -1183789060:
                if (str.equals("inside")) {
                    z = 2;
                    break;
                }
                break;
            case -599445191:
                if (str.equals("complete")) {
                    z = false;
                    break;
                }
                break;
            case 3672219:
                if (str.equals("xbar")) {
                    z = true;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                return GensymBinaryRuleFactory.createFactoryFactory();
            case true:
                return XbarRuleFactory.createFactoryFactory();
            case true:
                return InsideRuleFactory.createFactoryFactory();
            default:
                throw new UnsupportedOperationException("Undefined binarization mode: " + str);
        }
    }

    private static void usage(String str) {
        if (jc != null) {
            if (str != null) {
                System.out.println("No input files specified.");
            }
            jc.setProgramName("java -cp <alto.jar> de.up.ling.irtg.script.PennTreebankConverter <inputfiles>");
            jc.usage();
            if (str != null) {
                System.exit(1);
            } else {
                System.exit(0);
            }
        }
    }

    private static Function<Tree<String>, Tree<String>> makeWordsPos() {
        return tree -> {
            return (Tree) tree.dfs((tree, list) -> {
                return (list.size() == 1 && ((Tree) list.get(0)).getChildren().isEmpty()) ? Tree.create(tree.getLabel(), new Tree[0]) : Tree.create(tree.getLabel(), (List<Tree<Object>>) list);
            });
        };
    }

    private static Function<Tree<String>, Tree<String>> t(Function<String, String> function) {
        return tree -> {
            return (Tree) tree.dfs((tree, list) -> {
                String str = (String) function.apply(tree.getLabel());
                if (str == null) {
                    return null;
                }
                return Tree.create(str, (List<Tree<String>>) list.stream().filter(tree -> {
                    return tree != null;
                }).collect(Collectors.toList()));
            });
        };
    }

    private static Function<Tree<String>, Tree<String>> tnl(Function<String, String> function) {
        return tree -> {
            return (Tree) tree.dfs((tree, list) -> {
                List list = (List) list.stream().filter(tree -> {
                    return tree != null;
                }).collect(Collectors.toList());
                if (list.isEmpty()) {
                    if (tree.getChildren().isEmpty()) {
                        return tree;
                    }
                    return null;
                }
                String str = (String) function.apply(tree.getLabel());
                if (str == null) {
                    return null;
                }
                return Tree.create(str, (List<Tree<String>>) list);
            });
        };
    }

    public static InterpretedTreeAutomaton getIrtg() {
        return irtg;
    }

    private static Map<String, String> readInCtfMapping(Reader reader) throws IOException, ParseException {
        Tree<String> parse = TreeParser.parse(StringTools.slurp(reader));
        HashMap hashMap = new HashMap();
        parse.dfs((tree, list) -> {
            if (tree == parse) {
                return null;
            }
            String str = (String) tree.getLabel();
            list.forEach(str2 -> {
                hashMap.put(str2, str);
                hashMap.put(str2 + "-bar", str + "-bar");
            });
            return str;
        });
        return hashMap;
    }
}
