package de.up.ling.irtg.script;

import com.lowagie.text.html.HtmlTags;
import de.saar.basic.StringTools;
import de.up.ling.irtg.Interpretation;
import de.up.ling.irtg.InterpretedTreeAutomaton;
import de.up.ling.irtg.algebra.ParserException;
import de.up.ling.irtg.algebra.StringAlgebra;
import de.up.ling.irtg.algebra.TreeAlgebra;
import de.up.ling.irtg.automata.ConcreteTreeAutomaton;
import de.up.ling.irtg.automata.TreeAutomaton;
import de.up.ling.irtg.corpus.Corpus;
import de.up.ling.irtg.corpus.CorpusWriter;
import de.up.ling.irtg.corpus.Instance;
import de.up.ling.irtg.hom.Homomorphism;
import de.up.ling.irtg.hom.HomomorphismSymbol;
import de.up.ling.irtg.signature.Signature;
import de.up.ling.tree.ParseException;
import de.up.ling.tree.Tree;
import de.up.ling.tree.TreeParser;
import de.up.ling.tree.TreeVisitor;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import net.didion.jwnl.princeton.file.PrincetonRandomAccessDictionaryFile;
import org.springframework.transaction.interceptor.RuleBasedTransactionAttribute;
import org.springframework.util.AntPathMatcher;

/* JADX INFO: Access modifiers changed from: package-private */
@Deprecated
/* loaded from: input_file:de/up/ling/irtg/script/PTBConverter.class */
public class PTBConverter {
    private static final Logger log = Logger.getLogger(PTBConverter.class.getName());
    private static final int TOKEN_SIZE = 15;
    private static final boolean CONVERT = false;
    public static final boolean PARENT_ANNOTATION = true;
    private InterpretedTreeAutomaton irtg;
    private Homomorphism hStr;
    private Homomorphism hPtb;
    private int maxTerminalsPerSentence;
    private Corpus corpus = new Corpus();
    private List<Tree<String>> ptbTrees = new ArrayList();
    private Map<String, Integer> ruleMap = new HashMap();

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:de/up/ling/irtg/script/PTBConverter$PtbTreeAlgebra.class */
    public static class PtbTreeAlgebra extends TreeAlgebra {
        private static final String START_SEQUENCE = "( ";
        public static final String LABEL_PREFIX = "ART-";
        private int numWords;
        private boolean useParentAnnotation;
        private static final Logger log = Logger.getLogger(PtbTreeAlgebra.class.getName());
        private static final Pattern STRIP_PATTERN = Pattern.compile("([^-=]+)([-=])(.+)");
        private static final Pattern LABELFX_PATTERN = Pattern.compile("(\\D+)(\\d+)(.*)");
        private static final Pattern CONCAT_PATTERN = Pattern.compile("(.+)(\\^)(.+)");

        public PtbTreeAlgebra() {
            this(true);
        }

        public PtbTreeAlgebra(boolean z) {
            this.useParentAnnotation = z;
        }

        public static String getArtificialLabelPrefix() {
            return LABEL_PREFIX;
        }

        public int getNumWords() {
            return this.numWords;
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // de.up.ling.irtg.algebra.TreeAlgebra, de.up.ling.irtg.algebra.Algebra
        public Tree<String> evaluate(Tree<String> tree) {
            return (Tree) tree.dfs((TreeVisitor<String, Down, Up>) new TreeVisitor<String, Void, Tree<String>>() { // from class: de.up.ling.irtg.script.PTBConverter.PtbTreeAlgebra.1
                /* JADX WARN: Can't rename method to resolve collision */
                @Override // de.up.ling.tree.TreeVisitor
                public Tree<String> combine(Tree<String> tree2, List<Tree<String>> list) {
                    String str;
                    ArrayList arrayList = new ArrayList();
                    String label = tree2.getLabel();
                    for (int i = 0; i < list.size(); i++) {
                        Tree<String> tree3 = list.get(i);
                        if (tree3.getLabel().startsWith(PtbTreeAlgebra.LABEL_PREFIX)) {
                            arrayList.addAll(tree3.getChildren());
                        } else {
                            arrayList.add(tree3);
                        }
                    }
                    if (list.isEmpty()) {
                        str = label;
                    } else {
                        Matcher matcher = PtbTreeAlgebra.LABELFX_PATTERN.matcher(label);
                        str = matcher.matches() ? matcher.group(1) : label;
                    }
                    return Tree.create(str, arrayList);
                }
            });
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // de.up.ling.irtg.algebra.TreeAlgebra, de.up.ling.irtg.algebra.Algebra
        public TreeAutomaton decompose(Tree<String> tree) {
            return super.decompose(binarizeAndRelabel(tree));
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // de.up.ling.irtg.algebra.TreeAlgebra, de.up.ling.irtg.algebra.Algebra
        public Tree<String> parseString(String str) throws ParserException {
            try {
                Tree<String> parseFromReader = parseFromReader(new StringReader(str));
                if (parseFromReader == null) {
                    try {
                        parseFromReader = TreeParser.parse(str);
                    } catch (ParseException e) {
                        throw new ParserException(e);
                    }
                }
                this.signature.addAllSymbols(parseFromReader);
                return parseFromReader;
            } catch (IOException e2) {
                throw new ParserException(e2);
            }
        }

        public Tree<String> parseFromReader(Reader reader) throws IOException {
            this.numWords = 0;
            String str = "";
            do {
                int read = reader.read();
                if (read == -1) {
                    return null;
                }
                str = read == 40 ? "(" : str + ((char) read);
            } while (!str.equals(START_SEQUENCE));
            Tree<String> parseTree = parseTree(reader);
            List<Tree<String>> children = parseTree.getChildren();
            if (children.isEmpty()) {
                return null;
            }
            if (children.size() > 1) {
                for (int i = 0; i < children.size(); i++) {
                    List<Tree<String>> children2 = children.get(i).getChildren();
                    if (children2.size() > 1 || !children2.get(0).getChildren().isEmpty()) {
                        parseTree = children.get(i);
                        for (int i2 = 0; i2 < i; i2++) {
                            parseTree.getChildren().add(i2, children.get(i2));
                        }
                        for (int i3 = i + 1; i3 < children.size(); i3++) {
                            parseTree.getChildren().add(children.get(i3));
                        }
                    }
                }
            } else {
                parseTree = children.get(0);
            }
            return parseTree;
        }

        private String stripPosTag(StringBuffer stringBuffer) {
            String stringBuffer2 = stringBuffer.toString();
            Matcher matcher = STRIP_PATTERN.matcher(stringBuffer2);
            String group = matcher.matches() ? matcher.group(1) : stringBuffer2;
            new StringBuffer();
            return group;
        }

        private Tree<String> parseTree(Reader reader) throws IOException {
            String str = "";
            StringBuffer stringBuffer = new StringBuffer();
            ArrayList arrayList = new ArrayList();
            while (true) {
                int read = reader.read();
                if (read == -1) {
                    log.log(Level.SEVERE, "Unexpected end of parsed input.");
                    return null;
                }
                if (read == 40) {
                    Tree<String> parseTree = parseTree(reader);
                    if (parseTree != null) {
                        arrayList.add(parseTree);
                    }
                } else {
                    if (read == 41) {
                        if (stringBuffer.length() > 0) {
                            if (str.isEmpty()) {
                                str = stripPosTag(stringBuffer);
                            } else {
                                arrayList.add(Tree.create(stringBuffer.toString().toLowerCase(), new Tree[0]));
                                this.numWords++;
                            }
                        }
                        if (arrayList.isEmpty()) {
                            return null;
                        }
                        if (arrayList.size() == 1) {
                            Tree<String> tree = (Tree) arrayList.get(0);
                            if (!tree.getChildren().isEmpty() && tree.getLabel().equals(str)) {
                                return tree;
                            }
                        }
                        return Tree.create(str, arrayList);
                    }
                    if (read == 32) {
                        if (stringBuffer.length() > 0 && str.isEmpty()) {
                            str = stripPosTag(stringBuffer);
                            stringBuffer = new StringBuffer();
                        }
                    } else {
                        if (read == 45 && stringBuffer.length() == 0 && str.isEmpty()) {
                            skipElement(reader);
                            return null;
                        }
                        if (read == 91) {
                            skipElement(reader);
                            return null;
                        }
                        if (read > 32) {
                            stringBuffer.append((char) read);
                        }
                    }
                }
            }
        }

        private void skipElement(Reader reader) throws IOException {
            int read;
            do {
                read = reader.read();
                if (read == -1) {
                    return;
                }
            } while (read != 41);
        }

        public Tree<String> binarizeAndRelabel(Tree<String> tree) {
            return (Tree) tree.dfs((TreeVisitor<String, Down, Up>) new TreeVisitor<String, Void, Tree<String>>() { // from class: de.up.ling.irtg.script.PTBConverter.PtbTreeAlgebra.2
                /* JADX WARN: Can't rename method to resolve collision */
                @Override // de.up.ling.tree.TreeVisitor
                public Tree<String> combine(Tree<String> tree2, List<Tree<String>> list) {
                    if (list.isEmpty()) {
                        return tree2;
                    }
                    String str = tree2.getLabel() + String.valueOf(list.size());
                    if (PtbTreeAlgebra.this.useParentAnnotation && !list.get(0).getChildren().isEmpty()) {
                        for (Tree<String> tree3 : list) {
                            tree3.setLabel(tree3.getLabel() + "^" + str);
                        }
                    }
                    if (list.size() != 1 && list.size() != 2) {
                        ArrayList arrayList = new ArrayList();
                        arrayList.add(list.get(0));
                        arrayList.add(PtbTreeAlgebra.this.binarize(list, 1));
                        return Tree.create(str, arrayList);
                    }
                    return Tree.create(str, list);
                }
            });
        }

        public Tree<String> binarize(List<Tree<String>> list, int i) {
            ArrayList arrayList = new ArrayList();
            arrayList.add(list.get(i));
            String str = LABEL_PREFIX + concatLabels(list, i);
            if (list.size() > i + 1) {
                arrayList.add(binarize(list, i + 1));
            }
            return Tree.create(str, arrayList);
        }

        public String concatLabels(List<Tree<String>> list, int i) {
            StringBuilder sb = new StringBuilder();
            if (this.useParentAnnotation) {
                Matcher matcher = CONCAT_PATTERN.matcher(list.get(i).getLabel());
                matcher.matches();
                sb.append(matcher.group(1));
                String group = matcher.group(3);
                for (int i2 = i + 1; i2 < list.size(); i2++) {
                    sb.append(RuleBasedTransactionAttribute.PREFIX_ROLLBACK_RULE);
                    Matcher matcher2 = CONCAT_PATTERN.matcher(list.get(i2).getLabel());
                    matcher2.matches();
                    sb.append(matcher2.group(1));
                }
                sb.append("^");
                sb.append(group);
            } else {
                sb.append(list.get(i).getLabel());
                for (int i3 = i + 1; i3 < list.size(); i3++) {
                    sb.append(RuleBasedTransactionAttribute.PREFIX_ROLLBACK_RULE);
                    sb.append(list.get(i3).getLabel());
                }
            }
            return sb.toString();
        }

        @Override // de.up.ling.irtg.algebra.TreeAlgebra, de.up.ling.irtg.algebra.Algebra
        public /* bridge */ /* synthetic */ Tree<String> evaluate(Tree tree) {
            return evaluate((Tree<String>) tree);
        }
    }

    public static void main(String[] strArr) throws IOException {
        String str = strArr.length > 0 ? strArr[0] : "examples/ptb-test.mrg";
        String str2 = strArr.length > 1 ? strArr[1] : null;
        int intValue = strArr.length > 2 ? Integer.valueOf(strArr[2]).intValue() : 15;
        boolean z = strArr.length > 3 ? !strArr[3].equals("noconversion") : false;
        ArrayList arrayList = new ArrayList();
        arrayList.add(HtmlTags.I);
        arrayList.add("ptb");
        if (str2 != null && !arrayList.contains(str2)) {
            System.err.println("Ignore sorting request on interpretation '" + str2 + "'.");
            System.err.println("Permitted interpretations: " + arrayList);
            str2 = null;
        }
        String filenamePrefix = getFilenamePrefix(str);
        String str3 = filenamePrefix + (z ? "-corpus-training.txt" : "-corpus-testing.txt");
        PTBConverter pTBConverter = new PTBConverter(intValue);
        log.info("Reading PTB data...");
        pTBConverter.read(getReaderForFilename(str));
        pTBConverter.initGrammar();
        log.info("Converting PTB trees...");
        pTBConverter.convert(str2);
        log.log(Level.INFO, "Converted rules: {0}", String.valueOf(pTBConverter.ruleMap.size()));
        log.info("Writing grammar...");
        pTBConverter.writeGrammar(new FileWriter(filenamePrefix + "-grammar.irtg"));
        log.info("Writing corpus...");
        FileWriter fileWriter = new FileWriter(str3);
        pTBConverter.corpus.forEach(new CorpusWriter(pTBConverter.irtg, null, fileWriter));
        fileWriter.close();
        log.info("Done.");
    }

    private static Reader getReaderForFilename(String str) throws FileNotFoundException, IOException {
        return str.endsWith(".gz") ? new InputStreamReader(new GZIPInputStream(new FileInputStream(str))) : new FileReader(str);
    }

    private static String getFilenamePrefix(String str) {
        String name = new File(str).getName();
        return name.endsWith(".mrg") ? name.substring(0, name.length() - 4) : name.endsWith(".mrg.gz") ? name.substring(0, name.length() - 7) : name;
    }

    public PTBConverter(int i) {
        this.maxTerminalsPerSentence = i;
        log.setLevel(Level.ALL);
    }

    public void initGrammar() {
        this.irtg = new InterpretedTreeAutomaton(new ConcreteTreeAutomaton());
        StringAlgebra stringAlgebra = new StringAlgebra();
        this.hStr = new Homomorphism(this.irtg.getAutomaton().getSignature(), stringAlgebra.getSignature());
        this.irtg.addInterpretation(HtmlTags.I, new Interpretation(stringAlgebra, this.hStr));
        PtbTreeAlgebra ptbTreeAlgebra = new PtbTreeAlgebra(true);
        this.hPtb = new Homomorphism(this.irtg.getAutomaton().getSignature(), ptbTreeAlgebra.getSignature());
        this.irtg.addInterpretation("ptb", new Interpretation(ptbTreeAlgebra, this.hPtb));
    }

    private void read(Reader reader) throws IOException {
        Tree<String> parseFromReader;
        PtbTreeAlgebra ptbTreeAlgebra = new PtbTreeAlgebra(true);
        do {
            parseFromReader = ptbTreeAlgebra.parseFromReader(reader);
            if (parseFromReader != null && ptbTreeAlgebra.getNumWords() <= this.maxTerminalsPerSentence) {
                this.ptbTrees.add(parseFromReader);
            }
        } while (parseFromReader != null);
    }

    private void convert(final String str) {
        ConcreteTreeAutomaton concreteTreeAutomaton = (ConcreteTreeAutomaton) this.irtg.getAutomaton();
        PtbTreeAlgebra ptbTreeAlgebra = new PtbTreeAlgebra(true);
        ArrayList arrayList = new ArrayList();
        for (Tree<String> tree : this.ptbTrees) {
            HashMap hashMap = new HashMap();
            hashMap.put("ptb", tree);
            Tree<Integer> extractRules = extractRules(ptbTreeAlgebra.binarizeAndRelabel(tree));
            concreteTreeAutomaton.addFinalState(concreteTreeAutomaton.addState(tree.getLabel()));
            hashMap.put(HtmlTags.I, StringTools.join(tree.getLeafLabels(), " "));
            Instance instance = new Instance();
            instance.setInputObjects(hashMap);
            instance.setDerivationTree(extractRules);
            arrayList.add(instance);
        }
        if (str != null) {
            Collections.sort(arrayList, new Comparator<Instance>() { // from class: de.up.ling.irtg.script.PTBConverter.1
                @Override // java.util.Comparator
                public int compare(Instance instance2, Instance instance3) {
                    return instance2.getInputObjects().get(str).toString().length() - instance3.getInputObjects().get(str).toString().length();
                }
            });
        }
        Iterator it2 = arrayList.iterator();
        while (it2.hasNext()) {
            this.corpus.addInstance((Instance) it2.next());
        }
    }

    private Tree<Integer> extractRules(final Tree<String> tree) {
        final ConcreteTreeAutomaton concreteTreeAutomaton = (ConcreteTreeAutomaton) this.irtg.getAutomaton();
        return (Tree) tree.dfs((TreeVisitor<String, Down, Up>) new TreeVisitor<String, Void, Tree<Integer>>() { // from class: de.up.ling.irtg.script.PTBConverter.2
            static final /* synthetic */ boolean $assertionsDisabled;

            /* JADX WARN: Can't rename method to resolve collision */
            @Override // de.up.ling.tree.TreeVisitor
            public Tree<Integer> combine(Tree<String> tree2, List<Tree<Integer>> list) {
                int addSymbol;
                if (list.isEmpty()) {
                    return null;
                }
                String nodeToRuleString = PTBConverter.this.nodeToRuleString(tree2);
                boolean containsKey = PTBConverter.this.ruleMap.containsKey(nodeToRuleString);
                if (containsKey) {
                    addSymbol = ((Integer) PTBConverter.this.ruleMap.get(nodeToRuleString)).intValue();
                } else {
                    addSymbol = PTBConverter.this.hStr.getSourceSignature().addSymbol(PrincetonRandomAccessDictionaryFile.READ_ONLY + String.valueOf(PTBConverter.this.ruleMap.size() + 1), list.size());
                    PTBConverter.this.ruleMap.put(nodeToRuleString, Integer.valueOf(addSymbol));
                }
                ArrayList arrayList = new ArrayList();
                ArrayList arrayList2 = new ArrayList();
                ArrayList arrayList3 = new ArrayList();
                if (list.get(0) != null) {
                    for (int i = 0; i < list.size(); i++) {
                        arrayList3.add(tree2.getChildren().get(i).getLabel());
                        arrayList2.add(list.get(i));
                        arrayList.add(Tree.create(HomomorphismSymbol.createVariable("?" + String.valueOf(i + 1)), new Tree[0]));
                    }
                    PTBConverter.this.hStr.add(addSymbol, PTBConverter.this.computeInterpretation(1, list.size(), PTBConverter.this.hStr.getTargetSignature()));
                } else {
                    if (!$assertionsDisabled && list.size() != 1) {
                        throw new AssertionError();
                    }
                    Tree<HomomorphismSymbol> create = Tree.create(HomomorphismSymbol.createConstant(tree2.getChildren().get(0).getLabel(), PTBConverter.this.hStr.getTargetSignature(), 0), new Tree[0]);
                    PTBConverter.this.hStr.add(addSymbol, create);
                    arrayList.add(create);
                }
                if (!containsKey) {
                    PTBConverter.this.hPtb.add(addSymbol, Tree.create(HomomorphismSymbol.createConstant(tree2.getLabel(), PTBConverter.this.hPtb.getTargetSignature(), 0), arrayList));
                    concreteTreeAutomaton.addRule(concreteTreeAutomaton.createRule((ConcreteTreeAutomaton) tree2.getLabel(), PTBConverter.this.hStr.getSourceSignature().resolveSymbolId(addSymbol), (List<ConcreteTreeAutomaton>) arrayList3));
                }
                if (tree2 == tree) {
                    concreteTreeAutomaton.addFinalState(concreteTreeAutomaton.getIdForState(tree2.getLabel()));
                }
                return Tree.create(Integer.valueOf(addSymbol), arrayList2);
            }

            static {
                $assertionsDisabled = !PTBConverter.class.desiredAssertionStatus();
            }
        });
    }

    /* JADX INFO: Access modifiers changed from: private */
    public String nodeToRuleString(Tree<String> tree) {
        StringBuilder sb = new StringBuilder();
        sb.append(tree.getLabel());
        for (Tree<String> tree2 : tree.getChildren()) {
            sb.append(AntPathMatcher.DEFAULT_PATH_SEPARATOR);
            sb.append(tree2.getLabel());
        }
        return sb.toString();
    }

    /* JADX INFO: Access modifiers changed from: private */
    public Tree<HomomorphismSymbol> computeInterpretation(int i, int i2, Signature signature) {
        if (i2 == 1) {
            return Tree.create(HomomorphismSymbol.createVariable("?1"), new Tree[0]);
        }
        if (i2 < i) {
            return null;
        }
        ArrayList arrayList = new ArrayList();
        arrayList.add(Tree.create(HomomorphismSymbol.createVariable("?" + String.valueOf(i)), new Tree[0]));
        if (i + 1 == i2) {
            arrayList.add(Tree.create(HomomorphismSymbol.createVariable("?" + String.valueOf(i2)), new Tree[0]));
        } else {
            arrayList.add(computeInterpretation(i + 1, i2, signature));
        }
        return Tree.create(HomomorphismSymbol.createConstant("*", signature, arrayList.size()), arrayList);
    }

    private void writeGrammar(Writer writer) throws IOException {
        writer.write(this.irtg.toString());
        writer.close();
    }
}
