class CWDMergeFixBot : public CWDItemsBot
{
private:
// CWDItemsBot
virtual void ExecuteSelfTests();
virtual std::vector<Wiki::QKey> GetItemsToProcess();
virtual void ProcessItem(Wiki::CWDItem& item, bool* add_to_ignore_list);
void FixItem (Wiki::CWDItem& item, const Wiki::QKey& merge_target, const Wiki::QKey& redirect);
void FixMainSnak (Wiki::CWDValue& value, const Wiki::QKey& merge_target, const Wiki::QKey& redirect, const std::string& value_id);
void FixQualifiers(Wiki::CWDValue& value, const Wiki::QKey& merge_target, const Wiki::QKey& redirect, const std::string& value_id);
void FixReferences(Wiki::CWDValue& value, const Wiki::QKey& merge_target, const Wiki::QKey& redirect, const std::string& value_id);
std::set<Wiki::QKey> GetLastDeletedItems();
std::set<Wiki::QKey> GetAllRedirects();
Wiki::QKey GetMergeTargetFromComment(const std::string& comment);
Wiki::QKey GetTarget(Wiki::CWDItem& merged_item);
bool IsItemExists(const Wiki::QKey& item_key);
bool IsItemExists(const Wiki::CWDItem& item);
std::vector<Wiki::PKey> m_all_props;
};
void CWDMergeFixBot::ExecuteSelfTests()
{
CWDItemsBot::ExecuteSelfTests();
TEST( GetMergeTargetFromComment("Merged into [[Q3497695]], via The Game") == QKey::FromString("Q3497695") );
TEST( GetMergeTargetFromComment("Duplicate of / merged with: [[Q123]] same region") == QKey::FromString("Q123") );
TEST( GetMergeTargetFromComment("Duplicate of / merged with: [[Q123]]") == QKey::FromString("Q123") );
}
std::set<QKey> CWDMergeFixBot::GetLastDeletedItems()
{
std::vector< std::pair<bool, std::string> > del_events = m_site.GetLastDeleteEvents("0", 4000);
m_log.Message(int_to_str(del_events.size()) + " delete events");
std::set<QKey> items;
for (size_t i = 0; i < del_events.size(); i++)
{
std::string page_name = del_events[i].second;
if (has_prefix(page_name, "Q"))
{
if (del_events[i].first)
items.insert(QKey::FromString(page_name));
else
items.erase(QKey::FromString(page_name));
}
}
return items;
}
std::set<QKey> CWDMergeFixBot::GetAllRedirects()
{
std::vector<std::string> redirects = m_site.GetAllPages("Q", "0", no, 2000000);
m_log.Message(int_to_str(redirects.size()) + " redirects");
std::set<QKey> items;
for (size_t i = 0; i < redirects.size(); i++)
items.insert(QKey::FromString(redirects[i]));
return items;
}
std::vector<QKey> CWDMergeFixBot::GetItemsToProcess()
{
m_all_props = m_wikidata.GetAllPropertyKeys();
std::set<QKey> todo_items;
todo_items += GetLastDeletedItems();
todo_items += GetAllRedirects();
return std::vector<QKey>(todo_items.rbegin(), todo_items.rend());
}
QKey CWDMergeFixBot::GetMergeTargetFromComment(const std::string& comment)
{
static const struct CommentPattern
{
std::string prefix;
std::string postfix;
} patterns[] =
{
{ "Merged into [[Q", "]], via The Game" },
{ "[[Wikidata:Requests for deletions|RfD]]: [[MediaWiki:Gadget-Merge.js|Merged]] with [[Q", "]]" },
{ "[[Wikidata:Requests for deletions|RfD]]: Merged with [[Q", "]]" },
{ "Duplicate of / merged with: [[Q", "]]" },
{ "Duplicate of / merged with: Q", "" },
{ "Merged with [[Q", "]] ([[MediaWiki:Gadget-Merge.js|merge.js]])" },
{ "Merged into [[Q", "]], via The Game" },
{ "[[User:Ricordisamoa/StreamDelete|SD]]: merged with [[Q", "]]" },
{ "Merged item in to \"Q", "\"" },
{ "merged with [[Q", "]]" },
{ "Emtpy item: merged with [[Q", "]]" },
};
for (size_t i = 0; i < sizeof(patterns)/sizeof(patterns[0]); i++)
{
const CommentPattern& pattern = patterns[i];
if (has_prefix(comment, pattern.prefix))
{
std::string digits;
for (size_t j = pattern.prefix.size(); j < comment.size(); j++)
if (IsDigit(comment[j]))
digits += comment[j];
else
break;
if ((digits.size() > 0) && has_prefix(comment, pattern.prefix + digits + pattern.postfix))
{
return QKey::FromString("Q" + digits);
}
}
}
throw CException("Unrecognized comment: " + comment + ".");
}
bool CWDMergeFixBot::IsItemExists(const QKey& item_key)
{
return m_site.GetPage(item_key.ToString()).Exists();
}
bool CWDMergeFixBot::IsItemExists(const CWDItem& item)
{
return IsItemExists(item.GetID());
}
QKey CWDMergeFixBot::GetTarget(CWDItem& merged_item)
{
CCurrentPage item_page = m_site.GetPage(merged_item.GetID().ToString());
if (item_page.Exists())
{
m_log.Message(" the item exists");
std::vector<CPageRevision> revs = item_page.GetRevisions(1, false);
if (revs.size() > 0)
{
m_log.Message(" " + revs[0].GetUserName() + " comment: " + revs[0].GetComment());
time_t elapsed = GetSecElapsed(revs[0].GetTime());
m_log.Message(" " + revs[0].GetISOTime() + " (" + SecToHumanFormat(elapsed) + " elapsed)");
if (elapsed < 24 * 60 * 60)
throw CException("Too early to fix this redirect.");
}
return merged_item.GetRedirectTarget();
}
else
{
m_log.Message(" the item is deleted");
std::vector<CLogEvent> events = item_page.GetLogEvents();
if (events.size() == 0)
throw CException("Empty events list.");
CLogEvent del_event = events[0];
if (del_event.GetActionStr() != "delete")
throw CException("The last action is " + del_event.GetActionStr() + ".");
m_log.Message(" user: " + del_event.GetUserName());
m_log.Message(" comment: " + del_event.GetComment());
return GetMergeTargetFromComment(del_event.GetComment());
}
}
void CWDMergeFixBot::ProcessItem(CWDItem& item, bool* add_to_ignore_list)
{
std::vector<QKey> links = m_wikidata.GetItemsWithItem(item.GetID(), 1000);
if (links.size() == 0)
{
*add_to_ignore_list = true;
return;
}
m_log.Message(" " + int_to_str(links.size()) + " link(s) found");
if (IsItemExists(item) && (! item.IsRedirect()))
{
m_log.Message(" existing non-redirect item");
*add_to_ignore_list = true;
return;
}
QKey merge_target = GetTarget(item);
m_log.Message(" target: " + merge_target.ToString());
if (! IsItemExists(merge_target))
throw CException("Target item " + merge_target.ToString() + " does not exists.");
if (item.GetID() < merge_target)
m_log.Message(" Warning: target has greater ID than source item.");
m_log.Message(" name: " + GetWDItemName(m_wikidata.GetItem(merge_target)));
for (size_t i = 0; i < links.size(); i++)
{
CWDItem fixed_item = m_wikidata.GetItem(links[i]);
FixItem(fixed_item, merge_target, item.GetID());
}
std::vector<QKey> links_after = m_wikidata.GetItemsWithItem(item.GetID());
if (links_after.size() > 0)
throw CException(int_to_str(links_after.size()) + " link(s) are not fixed");
*add_to_ignore_list = true;
}
void CWDMergeFixBot::FixItem(CWDItem& fixed_item, const QKey& merge_target, const QKey& redirect)
{
for (size_t i = 0; i < m_all_props.size(); i++)
{
CWDPropertyValues values = fixed_item.GetProperty(m_all_props[i]);
for (size_t j = 0; j < values.GetCount(); j++)
{
CWDValue value = values.GetValue(j);
std::string value_id = format("%-10s: %s", fixed_item.GetID().ToString().c_str(), values.GetPropertyKey().ToString().c_str());
FixMainSnak (value, merge_target, redirect, value_id);
FixQualifiers(value, merge_target, redirect, value_id);
FixReferences(value, merge_target, redirect, value_id);
}
}
}
void CWDMergeFixBot::FixMainSnak(CWDValue& value, const QKey& merge_target, const QKey& redirect, const std::string& value_id)
{
if ((value.GetType() == WDValueTypeItem) && (value.AsItemQKey() == redirect))
{
m_log.Message(" " + value_id);
if (! IsDebugMode())
value.Set(CWDValueData::FromItem(merge_target));
}
if ((value.GetType() == WDValueTypeQuantity) && value.AsQuantity().HasUnit() && (value.AsQuantity().GetUnit() == redirect))
{
CWDQuantity quantity = value.AsQuantity();
m_log.Message(" " + value_id + ": %s" + quantity.ToJSON());
quantity.SetUnit(merge_target);
if (! IsDebugMode())
value.Set(CWDValueData::FromQuantity(quantity));
}
}
void CWDMergeFixBot::FixQualifiers(CWDValue& value, const QKey& merge_target, const QKey& redirect, const std::string& value_id)
{
for (size_t i = 0; i < value.GetQualifierCount(); i++)
{
CWDQualifier qualifier = value.GetQualifier(i);
if ((qualifier.GetValue().GetType() == WDValueTypeItem) && (qualifier.GetValue().AsItemQKey() == redirect))
{
m_log.Message(" " + value_id + ":" + value.ToString());
m_log.Message(" qualifier " + qualifier.GetPropKey().ToString() + ":" + qualifier.GetValue().ToString());
if (! IsDebugMode())
qualifier.SetAsItem(m_wikidata.GetItem(merge_target));
}
}
}
void CWDMergeFixBot::FixReferences(CWDValue& value, const QKey& merge_target, const QKey& redirect, const std::string& value_id)
{
for (size_t r = 0; r < value.GetReferenceCount(); r++)
{
CWDReference ref = value.GetReference(r);
bool has_item = false;
for (size_t i = 0; i < ref.GetCount(); i++)
if ((ref.GetValue(i).GetType() == WDValueTypeItem) && (ref.GetValue(i).AsItemQKey() == redirect))
has_item = true;
if (has_item)
{
m_log.Message(" " + value_id + ":" + value.ToString());
m_log.Message(" reference:");
for (size_t m = 0; m < ref.GetCount(); m++)
m_log.Message(" " + ref.GetPropKey(m).ToString() + ":" + ref.GetValue(m).ToString());
std::vector< std::pair<PKey, CWDValueData> > updated_ref;
for (size_t i = 0; i < ref.GetCount(); i++)
{
if ((ref.GetValue(i).GetType() == WDValueTypeItem) && (ref.GetValue(i).AsItemQKey() == redirect))
updated_ref.push_back(std::make_pair(ref.GetPropKey(i), CWDValueData::FromItem(merge_target)));
else
updated_ref.push_back(std::make_pair(ref.GetPropKey(i), ref.GetValue(i)));
}
if (! IsDebugMode())
value.SetReference(r, updated_ref);
}
}
}