A global log describes commits applied across multiple repositories after a corrupted migration. Each valid log line begins with id <commitId> timestamp <ts> followed by zero or more file-path / opaque-identifier pairs.
Two commits belong to the same repository when they can be connected through at least one matching (filePath, opaqueIdentifier) pair. If the same connected component would force one file path to have two different opaque identifiers, the input is ambiguous.
Malformed log lines must be discarded. They do not themselves make the input ambiguous.
After all log lines are processed, answer repository queries of the form "startTimestamp endTimestamp filePath opaqueIdentifier". For each query, return the commit IDs for the matching repository, filtered to the inclusive timestamp range, sorted by increasing timestamp and then increasing commit ID. Each response line should preserve the original trailing space behavior from the prompt.
If the input is ambiguous anywhere, return a single-line result containing AMBIGUOUS INPUT!.
Function Description
Complete processRepositoryQueries.
processRepositoryQueries has the following parameters:
String[] logLines: raw log lines, one string per candidate log entryString[] queries: raw query strings in the format"start end path opaqueId"Returns:String[]of query responses, or["AMBIGUOUS INPUT!"]when the logs are ambiguous.
Constraints
0 ≤ N ≤ 10⁷log lines and0 ≤ R ≤ 10⁵queries.- Valid log entries are space-delimited words with unique keys per line.
- The first two keys of a valid log line are always
idandtimestamp. - Commit IDs are unique across valid log lines.
- Commit IDs and timestamps fit in non-negative 64-bit integers.
- Malformed log entries should be discarded.
Example 1
Input:
logLines = ["id 0 timestamp 10 foo a bar b","id 1 timestamp 20 bar b baz c","id 2 timestamp 15 qux d"]
queries = ["0 30 bar b","0 12 foo a","0 30 qux d"]
Output:
["0 1 ","0 ","2 "]
Explanation:
Commits 0 and 1 belong to the same repository because they share bar b. Commit 2 is isolated. Each query filters the connected component by timestamp and preserves the required trailing space.
Example 2
Input:
logLines = ["id 0 timestamp 10 foo a bar b","id 1 timestamp 20 bar b baz c","id 2 timestamp 30 baz c foo x"]
queries = ["0 30 foo a"]
Output:
["AMBIGUOUS INPUT!"]
Explanation:
The three commits would be connected through shared files, but the connected component assigns two different opaque identifiers to foo. That makes the entire input ambiguous, so queries are ignored and only AMBIGUOUS INPUT! is returned.
解法
解析 + 并查集。先解析每个日志行:拆 token,校验 id <id> timestamp <ts> 前缀及偶数键值对,跳过格式错误行;将 (path, opaqueId) 视作"事件锚点",所有同一 commit 的锚点联合(用并查集合并到 commitId 代表元)。在合并过程中维护每个连通块的 path -> opaqueId 映射,若同 path 在同连通块中出现两个不同 id,立刻标记 AMBIGUOUS。最后对每个查询定位锚点对应的连通块,过滤时间戳并排序输出。时间复杂度 O((N+R)·α(N))。
from typing import List
def processRepositoryQueries(logLines: List[str], queries: List[str]) -> List[str]:
parent = {}
block_map = {} # root -> dict[path]=opaqueId
commit_info = {} # commitId -> (timestamp, root)
anchor_to_commit = {} # (path, opaqueId) -> commitId
def find(x):
while parent[x] != x:
parent[x] = parent[parent[x]]
x = parent[x]
return x
ambiguous = False
def union(a, b):
nonlocal ambiguous
ra, rb = find(a), find(b)
if ra == rb:
return
# merge block_map
ma, mb = block_map[ra], block_map[rb]
if len(ma) < len(mb):
ra, rb = rb, ra
ma, mb = mb, ma
for p, oid in mb.items():
if p in ma and ma[p] != oid:
ambiguous = True
return
ma[p] = oid
parent[rb] = ra
del block_map[rb]
for line in logLines:
parts = line.split(' ')
if len(parts) < 4 or parts[0] != 'id' or parts[2] != 'timestamp':
continue
try:
cid = int(parts[1])
ts = int(parts[3])
except ValueError:
continue
rest = parts[4:]
if len(rest) % 2 != 0:
continue
# check duplicate keys within this line
keys = rest[0::2]
if len(set(keys)) != len(keys):
continue
if cid in commit_info:
continue
parent[cid] = cid
block_map[cid] = {}
commit_info[cid] = (ts, cid)
for i in range(0, len(rest), 2):
p, oid = rest[i], rest[i + 1]
existing = block_map[cid].get(p)
if existing is not None and existing != oid:
ambiguous = True
block_map[cid][p] = oid
if (p, oid) in anchor_to_commit:
union(cid, anchor_to_commit[(p, oid)])
if ambiguous:
break
anchor_to_commit[(p, oid)] = cid
if ambiguous:
return ["AMBIGUOUS INPUT!"]
if ambiguous:
return ["AMBIGUOUS INPUT!"]
results = []
for q in queries:
qp = q.split(' ')
start, end, path, oid = int(qp[0]), int(qp[1]), qp[2], qp[3]
if (path, oid) not in anchor_to_commit:
results.append("")
continue
root = find(anchor_to_commit[(path, oid)])
matches = []
for cid, (ts, _) in commit_info.items():
if find(cid) == root and start <= ts <= end:
matches.append((ts, cid))
matches.sort()
results.append(" ".join(str(c) for _, c in matches) + " ")
return resultsclass Solution {
Map<Long, Long> parent = new HashMap<>();
Map<Long, Map<String, String>> blockMap = new HashMap<>();
Map<Long, long[]> commitInfo = new HashMap<>(); // cid -> [ts]
Map<String, Long> anchorToCommit = new HashMap<>();
boolean ambiguous = false;
long find(long x) {
while (parent.get(x) != x) {
parent.put(x, parent.get(parent.get(x)));
x = parent.get(x);
}
return x;
}
void union(long a, long b) {
long ra = find(a), rb = find(b);
if (ra == rb) return;
Map<String, String> ma = blockMap.get(ra), mb = blockMap.get(rb);
if (ma.size() < mb.size()) { long t = ra; ra = rb; rb = t; Map<String, String> tm = ma; ma = mb; mb = tm; }
for (Map.Entry<String, String> e : mb.entrySet()) {
String prev = ma.get(e.getKey());
if (prev != null && !prev.equals(e.getValue())) { ambiguous = true; return; }
ma.put(e.getKey(), e.getValue());
}
parent.put(rb, ra);
blockMap.remove(rb);
}
String[] processRepositoryQueries(String[] logLines, String[] queries) {
for (String line : logLines) {
String[] parts = line.split(" ");
if (parts.length < 4 || !"id".equals(parts[0]) || !"timestamp".equals(parts[2])) continue;
long cid, ts;
try { cid = Long.parseLong(parts[1]); ts = Long.parseLong(parts[3]); }
catch (NumberFormatException e) { continue; }
if ((parts.length - 4) % 2 != 0) continue;
Set<String> keys = new HashSet<>();
boolean dup = false;
for (int i = 4; i < parts.length; i += 2) if (!keys.add(parts[i])) { dup = true; break; }
if (dup) continue;
if (commitInfo.containsKey(cid)) continue;
parent.put(cid, cid);
blockMap.put(cid, new HashMap<>());
commitInfo.put(cid, new long[]{ts});
for (int i = 4; i < parts.length; i += 2) {
String p = parts[i], oid = parts[i + 1];
blockMap.get(cid).put(p, oid);
String key = p + " " + oid;
if (anchorToCommit.containsKey(key)) {
union(cid, anchorToCommit.get(key));
if (ambiguous) return new String[]{"AMBIGUOUS INPUT!"};
}
anchorToCommit.put(key, cid);
}
}
if (ambiguous) return new String[]{"AMBIGUOUS INPUT!"};
List<String> results = new ArrayList<>();
for (String q : queries) {
String[] qp = q.split(" ");
long start = Long.parseLong(qp[0]), end = Long.parseLong(qp[1]);
String key = qp[2] + " " + qp[3];
if (!anchorToCommit.containsKey(key)) { results.add(""); continue; }
long root = find(anchorToCommit.get(key));
List<long[]> matches = new ArrayList<>();
for (Map.Entry<Long, long[]> e : commitInfo.entrySet()) {
if (find(e.getKey()) == root && e.getValue()[0] >= start && e.getValue()[0] <= end) {
matches.add(new long[]{e.getValue()[0], e.getKey()});
}
}
matches.sort((a, b) -> a[0] != b[0] ? Long.compare(a[0], b[0]) : Long.compare(a[1], b[1]));
StringBuilder sb = new StringBuilder();
for (long[] m : matches) sb.append(m[1]).append(' ');
results.add(sb.toString());
}
return results.toArray(new String[0]);
}
}class Solution {
public:
unordered_map<long long, long long> parent;
unordered_map<long long, unordered_map<string, string>> blockMap;
unordered_map<long long, long long> commitTs;
unordered_map<string, long long> anchorToCommit;
bool ambiguous = false;
long long find(long long x) {
while (parent[x] != x) {
parent[x] = parent[parent[x]];
x = parent[x];
}
return x;
}
void unite(long long a, long long b) {
long long ra = find(a), rb = find(b);
if (ra == rb) return;
if (blockMap[ra].size() < blockMap[rb].size()) swap(ra, rb);
for (auto& [p, oid] : blockMap[rb]) {
auto it = blockMap[ra].find(p);
if (it != blockMap[ra].end() && it->second != oid) { ambiguous = true; return; }
blockMap[ra][p] = oid;
}
parent[rb] = ra;
blockMap.erase(rb);
}
vector<string> processRepositoryQueries(vector<string>& logLines, vector<string>& queries) {
for (auto& line : logLines) {
vector<string> parts;
stringstream ss(line);
string tok;
while (ss >> tok) parts.push_back(tok);
if (parts.size() < 4 || parts[0] != "id" || parts[2] != "timestamp") continue;
long long cid, ts;
try { cid = stoll(parts[1]); ts = stoll(parts[3]); }
catch (...) { continue; }
if ((parts.size() - 4) % 2 != 0) continue;
unordered_set<string> keys;
bool dup = false;
for (size_t i = 4; i < parts.size(); i += 2) if (!keys.insert(parts[i]).second) { dup = true; break; }
if (dup) continue;
if (commitTs.count(cid)) continue;
parent[cid] = cid;
commitTs[cid] = ts;
for (size_t i = 4; i < parts.size(); i += 2) {
string p = parts[i], oid = parts[i + 1];
blockMap[cid][p] = oid;
string key = p + " " + oid;
auto it = anchorToCommit.find(key);
if (it != anchorToCommit.end()) {
unite(cid, it->second);
if (ambiguous) return {"AMBIGUOUS INPUT!"};
}
anchorToCommit[key] = cid;
}
}
if (ambiguous) return {"AMBIGUOUS INPUT!"};
vector<string> results;
for (auto& q : queries) {
vector<string> qp;
stringstream ss(q);
string tok;
while (ss >> tok) qp.push_back(tok);
long long start = stoll(qp[0]), end = stoll(qp[1]);
string key = qp[2] + " " + qp[3];
if (!anchorToCommit.count(key)) { results.push_back(""); continue; }
long long root = find(anchorToCommit[key]);
vector<pair<long long, long long>> matches;
for (auto& [cid, ts] : commitTs) {
if (find(cid) == root && ts >= start && ts <= end) matches.push_back({ts, cid});
}
sort(matches.begin(), matches.end());
string out;
for (auto& [t, c] : matches) out += to_string(c) + " ";
results.push_back(out);
}
return results;
}
};