/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.trees.international.pennchinese;

import edu.stanford.nlp.io.EncodingPrintWriter;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.trees.BobChrisTreeNormalizer;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeFactory;
import edu.stanford.nlp.trees.TreeTransformer;
import edu.stanford.nlp.trees.international.pennchinese.CTBTreeReaderFactory;
import edu.stanford.nlp.trees.international.pennchinese.ChineseTreebankLanguagePack;
import edu.stanford.nlp.trees.tregex.TregexPattern;
import edu.stanford.nlp.trees.tregex.tsurgeon.Tsurgeon;
import edu.stanford.nlp.trees.tregex.tsurgeon.TsurgeonPattern;
import java.io.Serializable;
import java.util.Arrays;
import java.util.List;
import java.util.function.Predicate;
import java.util.regex.Pattern;

public class CTBErrorCorrectingTreeNormalizer
extends BobChrisTreeNormalizer {
    private static final long serialVersionUID = -8203853817025401845L;
    private static final Pattern NPTmpPattern = Pattern.compile("NP.*-TMP.*");
    private static final Pattern PPTmpPattern = Pattern.compile("PP.*-TMP.*");
    private static final Pattern TmpPattern = Pattern.compile(".*-TMP.*");
    private static final boolean DEBUG = System.getProperty("CTBErrorCorrectingTreeNormalizer") != null;
    private final TreeTransformer tagExtender;
    private final boolean splitNPTMP;
    private final boolean splitPPTMP;
    private final boolean splitXPTMP;
    private final Predicate<Tree> chineseEmptyFilter = new ChineseEmptyFilter();
    private static final TregexPattern[] fixupTregex = new TregexPattern[]{TregexPattern.compile("PU=punc < \u5979\uff5b"), TregexPattern.compile("@NP <1 (@NP <1 NR <2 (PU=bad < /^\uff1c$/)) <2 (FLR=dest <2 (NT < /\uff25\uff4e\uff47\uff4c\uff49\uff53\uff48/))"), TregexPattern.compile("@IP < (FLR=dest <: (PU < /^\u3008$/) $. (__=bad1 $. (PU=bad2 < /^\u3009$/)))"), TregexPattern.compile("@DFL|FLR|IMG|SKIP=junk <<, (PU < /^[\u3008\uff5b{\uff1c\\[\uff3b]$/) <<- (PU < /^[\u3009\uff5d}\uff1e\\]\uff3d]$/)  <3 __"), TregexPattern.compile("WHPP=bad")};
    private static final TsurgeonPattern[] fixupTsurgeon;

    public CTBErrorCorrectingTreeNormalizer() {
        this(false, false, false, false);
    }

    public CTBErrorCorrectingTreeNormalizer(boolean splitNPTMP, boolean splitPPTMP, boolean splitXPTMP, boolean charTags) {
        this.splitNPTMP = splitNPTMP;
        this.splitPPTMP = splitPPTMP;
        this.splitXPTMP = splitXPTMP;
        if (charTags) {
            try {
                this.tagExtender = (TreeTransformer)Class.forName("edu.stanford.nlp.trees.international.pennchinese.CharacterLevelTagExtender").newInstance();
            }
            catch (Exception e) {
                throw new RuntimeException(e);
            }
        } else {
            this.tagExtender = null;
        }
    }

    @Override
    protected String cleanUpLabel(String label) {
        if (label == null) {
            return "ROOT";
        }
        boolean nptemp = NPTmpPattern.matcher(label).matches();
        boolean pptemp = PPTmpPattern.matcher(label).matches();
        boolean anytemp = TmpPattern.matcher(label).matches();
        label = this.tlp.basicCategory(label);
        if (anytemp && this.splitXPTMP) {
            label = label + "-TMP";
        } else if (pptemp && this.splitPPTMP) {
            label = label + "-TMP";
        } else if (nptemp && this.splitNPTMP) {
            label = label + "-TMP";
        }
        return label;
    }

    @Override
    public Tree normalizeWholeTree(Tree tree, TreeFactory tf) {
        Tree newTree = tree.prune(this.chineseEmptyFilter, tf).spliceOut(this.aOverAFilter);
        Tree[] kids = newTree.children();
        if (kids.length > 1) {
            EncodingPrintWriter.err.println("Possible error: non-unary initial rewrite: " + newTree.localTree(), "utf-8");
        } else if (kids.length > 0) {
            Tree child = kids[0];
            if (!child.isPhrasal()) {
                if (DEBUG) {
                    EncodingPrintWriter.err.println("Correcting error: treebank tree is not phrasal; wrapping in FRAG: " + child, "utf-8");
                }
                Tree added = tf.newTreeNode("FRAG", Arrays.asList(kids));
                newTree.setChild(0, added);
            } else if (child.label().value().equals("META")) {
                EncodingPrintWriter.err.println("Deleting META tree that should be XML metadata in chtb_5200.df: " + child, "utf-8");
                return null;
            }
        } else {
            EncodingPrintWriter.err.println("Error: tree with no children: " + tree, "utf-8");
        }
        for (Tree subtree : newTree) {
            Tree subsubtree;
            if (subtree.value().equals("CP") && subtree.numChildren() == 1 && (subsubtree = subtree.firstChild()).value().equals("ROOT") && subsubtree.firstChild().isLeaf() && "CP".equals(subsubtree.firstChild().value())) {
                EncodingPrintWriter.err.println("Correcting error: seriously messed up tree in CTB6 (chtb_3095.bn): " + newTree, "utf-8");
                List<Tree> children = subsubtree.getChildrenAsList();
                children = children.subList(1, children.size());
                subtree.setChildren(children);
                EncodingPrintWriter.err.println("  Corrected as:                                                    " + newTree, "utf-8");
            }
            if (subtree.isPreTerminal()) {
                if (subtree.value().matches("NP")) {
                    if (ChineseTreebankLanguagePack.chineseDouHaoAcceptFilter().test(subtree.firstChild().value())) {
                        if (DEBUG) {
                            EncodingPrintWriter.err.println("Correcting error: NP preterminal over douhao; preterminal changed to PU: " + subtree, "utf-8");
                        }
                        subtree.setValue("PU");
                        continue;
                    }
                    if (subtree.parent(newTree).value().matches("NP")) {
                        if (DEBUG) {
                            EncodingPrintWriter.err.println("Correcting error: NP preterminal w/ NP parent; preterminal changed to NN: " + subtree.parent(newTree), "utf-8");
                        }
                        subtree.setValue("NN");
                        continue;
                    }
                    if (DEBUG) {
                        EncodingPrintWriter.err.println("Correcting error: NP preterminal w/o NP parent, changing preterminal to NN: " + subtree.parent(newTree), "utf-8");
                    }
                    subtree.setValue("NN");
                    continue;
                }
                if (!subtree.value().matches("PU")) continue;
                if (subtree.firstChild().value().matches("\u4ed6")) {
                    if (DEBUG) {
                        EncodingPrintWriter.err.println("Correcting error: \"\u4ed6\" under PU tag; tag changed to PN: " + subtree, "utf-8");
                    }
                    subtree.setValue("PN");
                    continue;
                }
                if (subtree.firstChild().value().equals("\u91cc")) {
                    if (DEBUG) {
                        EncodingPrintWriter.err.println("Correcting error: \"" + subtree.firstChild().value() + "\" under PU tag; tag changed to LC: " + subtree, "utf-8");
                    }
                    subtree.setValue("LC");
                    continue;
                }
                if (subtree.firstChild().value().equals("\u662f")) {
                    if (DEBUG) {
                        EncodingPrintWriter.err.println("Correcting error: \"" + subtree.firstChild().value() + "\" under PU tag; tag changed to VC: " + subtree, "utf-8");
                    }
                    subtree.setValue("VC");
                    continue;
                }
                if (subtree.firstChild().value().matches("tw|\u534a\u7a74\u5f0f")) {
                    if (DEBUG) {
                        EncodingPrintWriter.err.println("Correcting error: \"" + subtree.firstChild().value() + "\" under PU tag; tag changed to NN: " + subtree, "utf-8");
                    }
                    subtree.setValue("NN");
                    continue;
                }
                if (!subtree.firstChild().value().matches("33")) continue;
                if (DEBUG) {
                    EncodingPrintWriter.err.println("Correcting error: \"33\" under PU tag; tag changed to CD: " + subtree, "utf-8");
                }
                subtree.setValue("CD");
                continue;
            }
            if (subtree.value().matches("NN")) {
                if (DEBUG) {
                    EncodingPrintWriter.err.println("Correcting error: NN phrasal tag changed to NP: " + subtree, "utf-8");
                }
                subtree.setValue("NP");
                continue;
            }
            if (!subtree.value().matches("MSP")) continue;
            if (DEBUG) {
                EncodingPrintWriter.err.println("Correcting error: MSP phrasal tag changed to VP: " + subtree, "utf-8");
            }
            subtree.setValue("VP");
        }
        for (int i = 0; i < fixupTregex.length; ++i) {
            if (DEBUG) {
                Tree preProcessed = newTree.deepCopy();
                if (preProcessed.equals(newTree = Tsurgeon.processPattern(fixupTregex[i], fixupTsurgeon[i], newTree))) continue;
                EncodingPrintWriter.err.println("Correcting error: Updated tree using tregex " + fixupTregex[i] + " and tsurgeon " + fixupTsurgeon[i], "utf-8");
                EncodingPrintWriter.err.println("  from: " + preProcessed, "utf-8");
                EncodingPrintWriter.err.println("    to: " + newTree, "utf-8");
                continue;
            }
            newTree = Tsurgeon.processPattern(fixupTregex[i], fixupTsurgeon[i], newTree);
        }
        if (newTree.numChildren() == 0) {
            if (DEBUG) {
                EncodingPrintWriter.err.println("Deleting tree that now has no contents: " + newTree, "utf-8");
            }
            return null;
        }
        if (this.tagExtender != null) {
            newTree = this.tagExtender.transformTree(newTree);
        }
        return newTree;
    }

    static {
        if (fixupTregex.length != (fixupTsurgeon = new TsurgeonPattern[]{Tsurgeon.parseOperation("replace punc (PN \u5979) (PU \uff5b)"), Tsurgeon.parseOperation("move bad >1 dest"), Tsurgeon.parseOperation("[move bad1 >-1 dest] [move bad2 >-1 dest]"), Tsurgeon.parseOperation("delete junk"), Tsurgeon.parseOperation("relabel bad PP")}).length) {
            throw new AssertionError((Object)"fixupTregex and fixupTsurgeon have different lengths in CTBErrorCorrectingTreeNormalizer.");
        }
    }

    public static class CTBErrorCorrectingTreeReaderFactory
    extends CTBTreeReaderFactory {
        public CTBErrorCorrectingTreeReaderFactory() {
            super(new CTBErrorCorrectingTreeNormalizer(false, false, false, false));
        }
    }

    private static class ChineseEmptyFilter
    implements Predicate<Tree>,
    Serializable {
        private static final long serialVersionUID = 8914098359495987617L;

        private ChineseEmptyFilter() {
        }

        @Override
        public boolean test(Tree t) {
            Tree[] kids = t.children();
            Label l = t.label();
            if (l != null && l.value() != null && l.value().matches("-NONE-.*") && !t.isLeaf() && kids.length == 1 && kids[0].isLeaf()) {
                if (!l.value().equals("-NONE-")) {
                    EncodingPrintWriter.err.println("Deleting errant node " + l.value() + " as if -NONE-: " + t, "utf-8");
                }
                return false;
            }
            return true;
        }
    }
}

