/*
 * Decompiled with CFR 0.152.
 */
package com.limegroup.gnutella.spam;

import com.limegroup.gnutella.RemoteFileDesc;
import com.limegroup.gnutella.URN;
import com.limegroup.gnutella.messages.QueryRequest;
import com.limegroup.gnutella.spam.AddressToken;
import com.limegroup.gnutella.spam.KeywordToken;
import com.limegroup.gnutella.spam.SizeToken;
import com.limegroup.gnutella.spam.Token;
import com.limegroup.gnutella.spam.UrnToken;
import com.limegroup.gnutella.spam.VendorToken;
import com.limegroup.gnutella.spam.XMLKeywordToken;
import com.limegroup.gnutella.xml.LimeXMLDocument;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

public class Tokenizer {
    private static final Log LOG = LogFactory.getLog(Tokenizer.class);
    private static int MIN_KEYWORD_LENGTH = 3;
    private static int MAX_KEYWORD_LENGTH = 8;
    private static final String KEYWORD_DELIMITERS = " -._+/*()\\,\t";

    private Tokenizer() {
    }

    public static Token[] getTokens(RemoteFileDesc desc) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("tokenizing: " + desc);
        }
        HashSet<Token> set = new HashSet<Token>();
        set.addAll(Tokenizer.getKeywordTokens(desc));
        if (desc.getSHA1Urn() != null) {
            set.add(Tokenizer.getUrnToken(desc));
        }
        set.add(Tokenizer.getSizeToken(desc));
        set.add(Tokenizer.getVendorToken(desc));
        set.add(Tokenizer.getAddressToken(desc));
        Token[] tokens = new Token[set.size()];
        tokens = set.toArray(tokens);
        return tokens;
    }

    public static Token[] getTokens(RemoteFileDesc[] descs) {
        HashSet<Token> set = new HashSet<Token>();
        for (int i = 0; i < descs.length; ++i) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("tokenizing: " + descs[i]);
            }
            set.addAll(Tokenizer.getKeywordTokens(descs[i]));
            if (descs[i].getSHA1Urn() != null) {
                set.add(Tokenizer.getUrnToken(descs[i]));
            }
            set.add(Tokenizer.getSizeToken(descs[i]));
            set.add(Tokenizer.getVendorToken(descs[i]));
            set.add(Tokenizer.getAddressToken(descs[i]));
        }
        Token[] tokens = new Token[set.size()];
        tokens = set.toArray(tokens);
        return tokens;
    }

    public static Token[] getTokens(QueryRequest qr) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("tokenizing: " + qr);
        }
        HashSet set = new HashSet();
        set.addAll(Tokenizer.getKeywordTokens(qr));
        set.addAll(Tokenizer.getUrnTokens(qr));
        Token[] tokens = new Token[set.size()];
        tokens = set.toArray(tokens);
        return tokens;
    }

    private static Token getUrnToken(RemoteFileDesc desc) {
        if (desc.getSHA1Urn() != null) {
            return new UrnToken(desc.getSHA1Urn());
        }
        return null;
    }

    private static Set getUrnTokens(QueryRequest qr) {
        if (qr.getQueryUrns().isEmpty()) {
            return Collections.EMPTY_SET;
        }
        Set urns = qr.getQueryUrns();
        HashSet<UrnToken> ret = new HashSet<UrnToken>();
        Iterator iter = urns.iterator();
        while (iter.hasNext()) {
            ret.add(new UrnToken((URN)iter.next()));
        }
        return ret;
    }

    private static Token getSizeToken(RemoteFileDesc desc) {
        return new SizeToken(desc.getSize());
    }

    private static Token getVendorToken(RemoteFileDesc desc) {
        return VendorToken.getToken(desc.getVendor());
    }

    private static Token getAddressToken(RemoteFileDesc desc) {
        return new AddressToken(desc.getInetAddress().getAddress(), desc.getPort());
    }

    private static Set getKeywordTokens(RemoteFileDesc desc) {
        return Tokenizer.getKeywordTokens(desc.getFileName(), desc.getXMLDocument());
    }

    private static Set getKeywordTokens(QueryRequest qr) {
        return Tokenizer.getKeywordTokens(qr.getQuery(), qr.getRichQuery());
    }

    private static Set getKeywordTokens(String fname, LimeXMLDocument doc) {
        Set tokens = Tokenizer.getKeywordTokens(fname.toLowerCase(Locale.US));
        if (doc != null) {
            Iterator iter = doc.getNameValueSet().iterator();
            while (iter.hasNext()) {
                Map.Entry next = (Map.Entry)iter.next();
                tokens.addAll(Tokenizer.getXMLKeywords(next.getKey().toString().toLowerCase(Locale.US), next.getValue().toString().toLowerCase(Locale.US)));
            }
        }
        return tokens;
    }

    private static Set getXMLKeywords(String name, String value) {
        name = Tokenizer.extractSimpleFieldName(name);
        HashSet<XMLKeywordToken> ret = new HashSet<XMLKeywordToken>();
        StringTokenizer tok = new StringTokenizer(value, KEYWORD_DELIMITERS);
        while (tok.hasMoreTokens()) {
            byte[] token = tok.nextToken().getBytes();
            if (token.length < MIN_KEYWORD_LENGTH) continue;
            if (token.length > MAX_KEYWORD_LENGTH) {
                token = Tokenizer.truncateArray(token, MAX_KEYWORD_LENGTH);
            }
            ret.add(new XMLKeywordToken(name, token));
        }
        return ret;
    }

    private static byte[] truncateArray(byte[] array, int length) {
        byte[] ret = new byte[length];
        System.arraycopy(array, 0, ret, 0, length);
        return ret;
    }

    private static byte[] mergeArrays(byte[] array1, byte[] array2) {
        byte[] ret = new byte[array1.length + array2.length + 1];
        System.arraycopy(array1, 0, ret, 0, array1.length);
        ret[array1.length] = 0;
        System.arraycopy(array2, 0, ret, array1.length + 1, array2.length);
        return ret;
    }

    private static String extractSimpleFieldName(String canonicalField) {
        int idx1 = canonicalField.lastIndexOf("__");
        int idx2 = canonicalField.lastIndexOf("__", idx1 - 1);
        return canonicalField.substring(idx2 + "__".length(), idx1);
    }

    private static Set getKeywordTokens(String str) {
        HashSet<KeywordToken> ret = new HashSet<KeywordToken>();
        StringTokenizer tok = new StringTokenizer(str, KEYWORD_DELIMITERS);
        byte[] last = null;
        while (tok.hasMoreTokens()) {
            KeywordToken token;
            byte[] next = tok.nextToken().getBytes();
            if (next.length < MIN_KEYWORD_LENGTH) {
                if (last != null) {
                    token = new KeywordToken(Tokenizer.mergeArrays(last, next));
                    ret.add(token);
                }
                last = next;
                continue;
            }
            if (next.length > MAX_KEYWORD_LENGTH) {
                next = Tokenizer.truncateArray(next, MAX_KEYWORD_LENGTH);
            }
            token = new KeywordToken(next);
            ret.add(token);
            if (last != null) {
                token = new KeywordToken(Tokenizer.mergeArrays(last, next));
                ret.add(token);
            }
            last = next;
        }
        return ret;
    }
}

