User talk:Ivan A. Krestinin/Redirect fixing bot

class CWDMergeFixBot : public CWDItemsBot
{
	private:
		// CWDItemsBot
		virtual void ExecuteSelfTests();
		virtual std::vector<Wiki::QKey> GetItemsToProcess();
		virtual void ProcessItem(Wiki::CWDItem& item, bool* add_to_ignore_list);

		void FixItem      (Wiki::CWDItem&  item,  const Wiki::QKey& merge_target, const Wiki::QKey& redirect);
		void FixMainSnak  (Wiki::CWDValue& value, const Wiki::QKey& merge_target, const Wiki::QKey& redirect, const std::string& value_id);
		void FixQualifiers(Wiki::CWDValue& value, const Wiki::QKey& merge_target, const Wiki::QKey& redirect, const std::string& value_id);
		void FixReferences(Wiki::CWDValue& value, const Wiki::QKey& merge_target, const Wiki::QKey& redirect, const std::string& value_id);

		std::set<Wiki::QKey> GetLastDeletedItems();
		std::set<Wiki::QKey> GetAllRedirects();

		Wiki::QKey GetMergeTargetFromComment(const std::string& comment);
		Wiki::QKey GetTarget(Wiki::CWDItem& merged_item);

		bool IsItemExists(const Wiki::QKey& item_key);
		bool IsItemExists(const Wiki::CWDItem& item);

		std::vector<Wiki::PKey> m_all_props;
};


void CWDMergeFixBot::ExecuteSelfTests()
{
	CWDItemsBot::ExecuteSelfTests();

	TEST( GetMergeTargetFromComment("Merged into [[Q3497695]], via The Game")           == QKey::FromString("Q3497695") );
	TEST( GetMergeTargetFromComment("Duplicate of / merged with: [[Q123]] same region") == QKey::FromString("Q123")     );
	TEST( GetMergeTargetFromComment("Duplicate of / merged with: [[Q123]]")             == QKey::FromString("Q123")     );
}


std::set<QKey> CWDMergeFixBot::GetLastDeletedItems()
{
	std::vector< std::pair<bool, std::string> > del_events = m_site.GetLastDeleteEvents("0", 4000);
	m_log.Message(int_to_str(del_events.size()) + " delete events");

	std::set<QKey> items;
	for (size_t i = 0; i < del_events.size(); i++)
	{
		std::string page_name = del_events[i].second;
		if (has_prefix(page_name, "Q"))
		{
			if (del_events[i].first)
				items.insert(QKey::FromString(page_name));
			else
				items.erase(QKey::FromString(page_name));
		}
	}

	return items;
}


std::set<QKey> CWDMergeFixBot::GetAllRedirects()
{
	std::vector<std::string> redirects = m_site.GetAllPages("Q", "0", no, 2000000);
	m_log.Message(int_to_str(redirects.size()) + " redirects");

	std::set<QKey> items;
	for (size_t i = 0; i < redirects.size(); i++)
		items.insert(QKey::FromString(redirects[i]));
	return items;
}


std::vector<QKey> CWDMergeFixBot::GetItemsToProcess()
{
	m_all_props = m_wikidata.GetAllPropertyKeys();


	std::set<QKey> todo_items;
	todo_items += GetLastDeletedItems();
	todo_items += GetAllRedirects();
	return std::vector<QKey>(todo_items.rbegin(), todo_items.rend());
}


QKey CWDMergeFixBot::GetMergeTargetFromComment(const std::string& comment)
{
	static const struct CommentPattern
	{
		std::string prefix;
		std::string postfix;
	} patterns[] =
	{
		{ "Merged into [[Q", "]], via The Game" },
		{ "[[Wikidata:Requests for deletions|RfD]]: [[MediaWiki:Gadget-Merge.js|Merged]] with [[Q", "]]" },
		{ "[[Wikidata:Requests for deletions|RfD]]: Merged with [[Q", "]]" },
		{ "Duplicate of / merged with: [[Q", "]]" },
		{ "Duplicate of / merged with: Q", "" },
		{ "Merged with [[Q", "]] ([[MediaWiki:Gadget-Merge.js|merge.js]])" },
		{ "Merged into [[Q", "]], via The Game" },
		{ "[[User:Ricordisamoa/StreamDelete|SD]]: merged with [[Q", "]]" },
		{ "Merged item in to \"Q", "\"" },
		{ "merged with [[Q", "]]" },
		{ "Emtpy item: merged with [[Q", "]]" },
	};

	for (size_t i = 0; i < sizeof(patterns)/sizeof(patterns[0]); i++)
	{
		const CommentPattern& pattern = patterns[i];

		if (has_prefix(comment, pattern.prefix))
		{
			std::string digits;
			for (size_t j = pattern.prefix.size(); j < comment.size(); j++)
				if (IsDigit(comment[j]))
					digits += comment[j];
				else
					break;

			if ((digits.size() > 0) && has_prefix(comment, pattern.prefix + digits + pattern.postfix))
			{
				return QKey::FromString("Q" + digits);
			}
		}
	}

	throw CException("Unrecognized comment: " + comment + ".");
}


bool CWDMergeFixBot::IsItemExists(const QKey& item_key)
{
	return m_site.GetPage(item_key.ToString()).Exists();
}


bool CWDMergeFixBot::IsItemExists(const CWDItem& item)
{
	return IsItemExists(item.GetID());
}


QKey CWDMergeFixBot::GetTarget(CWDItem& merged_item)
{
	CCurrentPage item_page = m_site.GetPage(merged_item.GetID().ToString());
	if (item_page.Exists())
	{
		m_log.Message("  the item exists");

		std::vector<CPageRevision> revs = item_page.GetRevisions(1, false);
		if (revs.size() > 0)
		{
			m_log.Message("  " + revs[0].GetUserName() + " comment: " + revs[0].GetComment());

			time_t elapsed = GetSecElapsed(revs[0].GetTime());
			m_log.Message("  " + revs[0].GetISOTime() + " (" + SecToHumanFormat(elapsed) + " elapsed)");

			if (elapsed < 24 * 60 * 60)
				throw CException("Too early to fix this redirect.");
		}

		return merged_item.GetRedirectTarget();
	}
	else
	{
		m_log.Message("  the item is deleted");

		std::vector<CLogEvent> events = item_page.GetLogEvents();
		if (events.size() == 0)
			throw CException("Empty events list.");
		CLogEvent del_event = events[0];
		if (del_event.GetActionStr() != "delete")
			throw CException("The last action is " + del_event.GetActionStr() + ".");

		m_log.Message("  user:    " + del_event.GetUserName());
		m_log.Message("  comment: " + del_event.GetComment());

		return GetMergeTargetFromComment(del_event.GetComment());
	}
}


void CWDMergeFixBot::ProcessItem(CWDItem& item, bool* add_to_ignore_list)
{
	std::vector<QKey> links = m_wikidata.GetItemsWithItem(item.GetID(), 1000);
	if (links.size() == 0)
	{
		*add_to_ignore_list = true;
		return;
	}
	m_log.Message("  " + int_to_str(links.size()) + " link(s) found");

	if (IsItemExists(item) && (! item.IsRedirect()))
	{
		m_log.Message("  existing non-redirect item");
		*add_to_ignore_list = true;
		return;
	}

	QKey merge_target = GetTarget(item);
	m_log.Message("  target:  " + merge_target.ToString());

	if (! IsItemExists(merge_target))
		throw CException("Target item " + merge_target.ToString() + " does not exists.");
	if (item.GetID() < merge_target)
		m_log.Message("  Warning: target has greater ID than source item.");

	m_log.Message("  name: " + GetWDItemName(m_wikidata.GetItem(merge_target)));

	for (size_t i = 0; i < links.size(); i++)
	{
		CWDItem fixed_item = m_wikidata.GetItem(links[i]);
		FixItem(fixed_item, merge_target, item.GetID());
	}

	std::vector<QKey> links_after = m_wikidata.GetItemsWithItem(item.GetID());
	if (links_after.size() > 0)
		throw CException(int_to_str(links_after.size()) + " link(s) are not fixed");

	*add_to_ignore_list = true;
}


void CWDMergeFixBot::FixItem(CWDItem& fixed_item, const QKey& merge_target, const QKey& redirect)
{
	for (size_t i = 0; i < m_all_props.size(); i++)
	{
		CWDPropertyValues values = fixed_item.GetProperty(m_all_props[i]);
		for (size_t j = 0; j < values.GetCount(); j++)
		{
			CWDValue value = values.GetValue(j);

			std::string value_id = format("%-10s: %s", fixed_item.GetID().ToString().c_str(), values.GetPropertyKey().ToString().c_str());

			FixMainSnak  (value, merge_target, redirect, value_id);
			FixQualifiers(value, merge_target, redirect, value_id);
			FixReferences(value, merge_target, redirect, value_id);
		}
	}
}


void CWDMergeFixBot::FixMainSnak(CWDValue& value, const QKey& merge_target, const QKey& redirect, const std::string& value_id)
{
	if ((value.GetType() == WDValueTypeItem) && (value.AsItemQKey() == redirect))
	{
		m_log.Message("    " + value_id);

		if (! IsDebugMode())
			value.Set(CWDValueData::FromItem(merge_target));
	}
	if ((value.GetType() == WDValueTypeQuantity) && value.AsQuantity().HasUnit() && (value.AsQuantity().GetUnit() == redirect))
	{
		CWDQuantity quantity = value.AsQuantity();
		m_log.Message("    " + value_id + ": %s" + quantity.ToJSON());

		quantity.SetUnit(merge_target);
		if (! IsDebugMode())
			value.Set(CWDValueData::FromQuantity(quantity));
	}
}


void CWDMergeFixBot::FixQualifiers(CWDValue& value, const QKey& merge_target, const QKey& redirect, const std::string& value_id)
{
	for (size_t i = 0; i < value.GetQualifierCount(); i++)
	{
		CWDQualifier qualifier = value.GetQualifier(i);

		if ((qualifier.GetValue().GetType() == WDValueTypeItem) && (qualifier.GetValue().AsItemQKey() == redirect))
		{
			m_log.Message("    " + value_id + ":" + value.ToString());
			m_log.Message("        qualifier " + qualifier.GetPropKey().ToString() + ":" + qualifier.GetValue().ToString());

			if (! IsDebugMode())
				qualifier.SetAsItem(m_wikidata.GetItem(merge_target));
		}
	}
}


void CWDMergeFixBot::FixReferences(CWDValue& value, const QKey& merge_target, const QKey& redirect, const std::string& value_id)
{
	for (size_t r = 0; r < value.GetReferenceCount(); r++)
	{
		CWDReference ref = value.GetReference(r);

		bool has_item = false;
		for (size_t i = 0; i < ref.GetCount(); i++)
			if ((ref.GetValue(i).GetType() == WDValueTypeItem) && (ref.GetValue(i).AsItemQKey() == redirect))
				has_item = true;

		if (has_item)
		{
			m_log.Message("    " + value_id + ":" + value.ToString());
			m_log.Message("        reference:");
			for (size_t m = 0; m < ref.GetCount(); m++)
				m_log.Message("         " + ref.GetPropKey(m).ToString() + ":" + ref.GetValue(m).ToString());

			std::vector< std::pair<PKey, CWDValueData> > updated_ref;
			for (size_t i = 0; i < ref.GetCount(); i++)
			{
				if ((ref.GetValue(i).GetType() == WDValueTypeItem) && (ref.GetValue(i).AsItemQKey() == redirect))
					updated_ref.push_back(std::make_pair(ref.GetPropKey(i), CWDValueData::FromItem(merge_target)));
				else
					updated_ref.push_back(std::make_pair(ref.GetPropKey(i), ref.GetValue(i)));
			}

			if (! IsDebugMode())
				value.SetReference(r, updated_ref);
		}
	}
}
Return to the user page of "Ivan A. Krestinin/Redirect fixing bot".