MCPcopy
hub / github.com/ArchiveBox/ArchiveBox / write_link_to_sql_index

Function write_link_to_sql_index

archivebox/index/sql.py:38–87  ·  view source on GitHub ↗
(link: Link)

Source from the content-addressed store, hash-verified

36
37@enforce_types
38def write_link_to_sql_index(link: Link):
39 from core.models import Snapshot, ArchiveResult
40 info = {k: v for k, v in link._asdict().items() if k in Snapshot.keys}
41
42 tag_list = list(dict.fromkeys(
43 tag.strip() for tag in re.split(TAG_SEPARATOR_PATTERN, link.tags or '')
44 ))
45 info.pop('tags')
46
47 try:
48 info["timestamp"] = Snapshot.objects.get(url=link.url).timestamp
49 except Snapshot.DoesNotExist:
50 while Snapshot.objects.filter(timestamp=info["timestamp"]).exists():
51 info["timestamp"] = str(float(info["timestamp"]) + 1.0)
52
53 snapshot, _ = Snapshot.objects.update_or_create(url=link.url, defaults=info)
54 snapshot.save_tags(tag_list)
55
56 for extractor, entries in link.history.items():
57 for entry in entries:
58 if isinstance(entry, dict):
59 result, _ = ArchiveResult.objects.get_or_create(
60 snapshot_id=snapshot.id,
61 extractor=extractor,
62 start_ts=parse_date(entry['start_ts']),
63 defaults={
64 'end_ts': parse_date(entry['end_ts']),
65 'cmd': entry['cmd'],
66 'output': entry['output'],
67 'cmd_version': entry.get('cmd_version') or 'unknown',
68 'pwd': entry['pwd'],
69 'status': entry['status'],
70 }
71 )
72 else:
73 result, _ = ArchiveResult.objects.update_or_create(
74 snapshot_id=snapshot.id,
75 extractor=extractor,
76 start_ts=parse_date(entry.start_ts),
77 defaults={
78 'end_ts': parse_date(entry.end_ts),
79 'cmd': entry.cmd,
80 'output': entry.output,
81 'cmd_version': entry.cmd_version or 'unknown',
82 'pwd': entry.pwd,
83 'status': entry.status,
84 }
85 )
86
87 return snapshot
88
89
90@enforce_types

Callers 3

write_sql_main_indexFunction · 0.85
write_sql_link_detailsFunction · 0.85
archive_linkFunction · 0.85

Calls 5

parse_dateFunction · 0.85
filterMethod · 0.80
save_tagsMethod · 0.80
_asdictMethod · 0.45
getMethod · 0.45

Tested by

no test coverage detected