<?php
namespace DevOwl\SearchEnginePostType;

use DevOwl\Multilingual\TranslatePress;
use DevOwl\SearchEnginePostType\splitter\HtmlSplitter;
use WP_Post;
use WP_Query;
use WP_Term;

/**
 * A class which reads a post type completely and automatically indexes it
 * to the given search engine.
 */
class IndexRecords {
    use UtilsProvider;

    const KEEP_KEYS_FOR_RECORD = ['ID', 'post_content', 'post_title'];

    /**
     * The post type instance.
     *
     * @var SearchEnginePostType
     */
    private $searchEnginePostType;

    /**
     * C'tor.
     *
     * @param SearchEnginePostType $searchEnginePostType
     * @codeCoverageIgnore
     */
    public function __construct($searchEnginePostType) {
        $this->searchEnginePostType = $searchEnginePostType;
    }

    /**
     * Start the process of indexing with multilingual functionality.
     *
     * @param boolean $clear
     * @param int[] $post__in If given, only that posts will be updated
     */
    public function start($clear = true, $post__in = null) {
        SearchEnginePostType::log(
            sprintf('Start indexing post type %s...', $this->getSearchEnginePostType()->getArg('post_type'))
        );
        $compLanguage = $this->getSearchEnginePostType()->getCompLanguage();
        if ($compLanguage !== null && $compLanguage->isActive()) {
            $compLanguage->iterateAllLanguagesContext(function ($locale) use ($clear, $post__in) {
                $this->fetchAndIndex($locale, $clear, $post__in);
            });
        } else {
            $this->fetchAndIndex(null, $clear, $post__in);
        }
    }

    /**
     * Fetch posts and index them.
     *
     * @param string $locale
     * @param boolean $clear
     * @param int[] $post__in If given, only that posts will be updated
     */
    public function fetchAndIndex($locale = null, $clear = true, $post__in = null) {
        $searchEnginePostType = $this->getSearchEnginePostType();

        if ($locale !== null) {
            SearchEnginePostType::log(sprintf('Fetching content for locale %s...', $locale));
        } else {
            SearchEnginePostType::log(sprintf('Fetching content...', $locale));
        }

        // Make sure to clear all records
        $provider = $searchEnginePostType->getProvider();

        if ($clear) {
            $provider->clearIndex();
        }

        $paged = 1;
        $count = 0;

        do {
            if ($paged > 1) {
                SearchEnginePostType::log(sprintf('Fetching page %d...', $paged));
            }

            $args = [
                'post_type' => $searchEnginePostType->getArg('post_type'),
                'posts_per_page' => -1,
                'lang' => $locale,
                'posts_per_page' => 100,
                'paged' => $paged,
                'post_status' => 'publish',
            ];

            if ($post__in !== null) {
                $args['post__in'] = $this->preparePostInParameter($post__in);
            }

            $query = new WP_Query($args);

            if (!$query->have_posts()) {
                if ($paged > 1) {
                    SearchEnginePostType::log('No more posts found for this page!');
                }
                break;
            }

            // Prepare records
            $results = self::array_flatten(array_map([$this, 'preparePost'], $query->get_posts()));
            SearchEnginePostType::log(sprintf('Fetched %d posts, index each record...', count($results)));
            $provider->putIndex($results);

            $count += count($results);
            ++$paged;
        } while (true);

        SearchEnginePostType::log(sprintf('Successfully indexed %d records!', $count));
    }

    /**
     * Prepare `post__in` and translate it if a sync multilingual plugin is active like WPML or PolyLang.
     *
     * @param int[] $post__in
     */
    public function preparePostInParameter($post__in) {
        $searchEnginePostType = $this->getSearchEnginePostType();
        $compLanguage = $searchEnginePostType->getCompLanguage();
        return $compLanguage !== null && $compLanguage->isActive()
            ? array_unique(
                array_map(function ($id) use ($compLanguage, $searchEnginePostType) {
                    // When a multilingual plugin is active, get the ID of the current language ID
                    return $compLanguage->getCurrentPostId($id, $searchEnginePostType->getArg('post_type'));
                }, $post__in)
            )
            : $post__in;
    }

    /**
     * Map a post to array, translate it and finally serialize it.
     *
     * @param WP_Post $post
     */
    public function preparePost($post) {
        $result = $this->mapPost($post);
        $result = $this->translatePost($result);
        return $this->serializePost($result, $post);
    }

    /**
     * Translate a post array.
     *
     * @param array $postArray
     */
    protected function translatePost($postArray) {
        $compLanguage = $this->getSearchEnginePostType()->getCompLanguage();

        if ($compLanguage !== null && $compLanguage->isActive()) {
            $postArray = $compLanguage->translateArray($postArray);
        }
        return $postArray;
    }

    /**
     * Map a post to a record for the index.
     *
     * - Remove unnecessary elements
     * - E.g. in Algolia we need to transform the `ID` attribute to the object ID
     * - Map for the given search engine
     *
     * @param WP_Post $post
     */
    protected function mapPost($post) {
        $searchEnginePostType = $this->getSearchEnginePostType();
        $postArray = $post->to_array();
        $removeKeys = array_values(array_diff(array_keys($postArray), self::KEEP_KEYS_FOR_RECORD));
        foreach ($removeKeys as $key) {
            unset($postArray[$key]);
        }

        // Prepare HTML
        $postArray['post_content'] = HtmlSplitter::get_sanitized_content($postArray['post_content']);

        // Taxonomies
        foreach (get_object_taxonomies($post) as $taxonomy) {
            $postArray[$taxonomy] = array_map([$this, 'mapTerm'], wp_get_post_terms($post->ID, $taxonomy));

            // Calculate first level taxonomies
            $firstLevelTaxonomies = [];
            foreach ($postArray[$taxonomy] as $taxObject) {
                $parents = get_term_parents_list($taxObject['id'], $taxonomy, [
                    'separator' => '/',
                    'link' => false,
                    'format' => 'name',
                ]);

                if (is_string($parents)) {
                    $firstLevelTaxonomies[] = explode('/', $parents)[0];
                }
            }

            $firstLevelTaxonomies = array_values(array_unique($firstLevelTaxonomies));
            $postArray[$taxonomy . '_first'] = $firstLevelTaxonomies;
        }

        // Custom metadata
        $customFields = $searchEnginePostType->getArg('custom_fields');
        if (count($customFields) > 0) {
            $meta = get_post_meta($post->ID);
            $removeKeys = array_values(array_diff(array_keys($meta), $customFields));
            foreach ($removeKeys as $key) {
                unset($meta[$key]);
            }
            $postArray['meta'] = $meta;

            // Check if a meta modifier exists and apply it
            $modifyMeta = $searchEnginePostType->getArg('modifyMeta');
            if (is_callable($modifyMeta)) {
                $modifyMeta($postArray, $post, $searchEnginePostType);
            }
        }

        // Create object ID
        $provider = $searchEnginePostType->getProvider();
        $objectIdKey = $provider->getObjectIdKey();
        if ($objectIdKey !== 'ID') {
            $postArray[$objectIdKey] = $post->ID;
            unset($postArray['ID']);
        }

        $searchEnginePostType->getProvider()->mapPost($post, $postArray);

        return $postArray;
    }

    /**
     * Map a term to a valid record.
     *
     * @param WP_Term $term
     */
    public function mapTerm($term) {
        return [
            'id' => $term->term_id,
            'slug' => $term->slug,
            'name' => $term->name,
        ];
    }

    /**
     * Strips tags and serialize the post array. It additionally splits it
     * into multiple rows if the post is too heavy. It also returns multiple records
     * as we always try to split the content if the provider supports this.
     *
     * @param array $postArray
     * @param WP_Post $post
     * @return array[]
     */
    protected function serializePost($postArray, $post) {
        $searchEnginePostType = $this->getSearchEnginePostType();
        $provider = $searchEnginePostType->getProvider();
        $compLanguage = $searchEnginePostType->getCompLanguage();

        // Create permalink
        $permalink = get_permalink($post);
        if ($compLanguage !== null && $compLanguage->isActive()) {
            $permalink = $compLanguage->getPermalink($permalink, $compLanguage->getCurrentLanguageFallback());
        }
        $postArray['permalink'] = $permalink;

        // Check if we need to modify the single records (before duplication)
        $modifyRecord = $searchEnginePostType->getArg('modifyRecord');
        if (is_callable($modifyRecord)) {
            $modifyRecord($postArray, $post, $searchEnginePostType, $provider);
        }

        $post_content = $postArray['post_content'];
        $records = [$postArray];

        // Check if need to split the content
        $distinctKey = $provider->getDistinctKey();
        $objectIdKey = $provider->getObjectIdKey();
        if ($distinctKey) {
            $htmlSplitter = new HtmlSplitter();
            $splitted = $htmlSplitter->split($post_content);
            $distinctId = $postArray[$objectIdKey];

            // Check if content could be splitted
            if (count($splitted) > 0) {
                // Merge the common attributes into each split and add a unique `objectID`
                foreach ($splitted as $key => $split) {
                    $splitted[$key] = array_merge($postArray, $split, [
                        $objectIdKey => $distinctId . '-' . $key,
                    ]);
                    $splitted[$key][$distinctKey] = $distinctId;
                }
                $records = $splitted;
            } else {
                $records[0][$distinctKey] = $distinctId;
            }
        }

        // Calculated fields
        $introduction = explode("\n", $records[0]['post_content'])[0];
        $toc = [];
        foreach ($records as $record) {
            if (!empty($record['subtitle_h2'])) {
                $toc[] = $record['subtitle_h2'];
            }
        }

        // Apply common calculated attributes
        foreach ($records as &$record) {
            $record['introduction'] = $introduction;
            $record['toc'] = $toc;
        }

        // Check if we need to modify the records (e.g. add more records with deduplication)
        $modifyRecords = $searchEnginePostType->getArg('modifyRecords');
        if (is_callable($modifyRecords)) {
            $modifyRecords($records, $postArray, $post, $searchEnginePostType, $provider);
        }

        return $records;
    }

    /**
     * Getter.
     *
     * @codeCoverageIgnore
     */
    public function getSearchEnginePostType() {
        return $this->searchEnginePostType;
    }

    /**
     * Flatten an array.
     *
     * @param array $array
     * @param boolean $recursive
     */
    public static function array_flatten($array, $recursive = false) {
        $return = [];
        foreach ($array as $key => $value) {
            if (is_array($value)) {
                $return = array_merge($return, $recursive ? self::array_flatten($array, $recursive) : $value);
            } else {
                $return[$key] = $value;
            }
        }

        return $return;
    }
}
