MCPcopy
hub / github.com/ArchiveBox/ArchiveBox / save_git

Function save_git

archivebox/extractors/git.py:50–90  ·  view source on GitHub ↗

download full site using git

(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT)

Source from the content-addressed store, hash-verified

48
49@enforce_types
50def save_git(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult:
51 """download full site using git"""
52
53 out_dir = out_dir or Path(link.link_dir)
54 output: ArchiveOutput = 'git'
55 output_path = out_dir / output
56 output_path.mkdir(exist_ok=True)
57 cmd = [
58 GIT_BINARY,
59 'clone',
60 *GIT_ARGS,
61 *([] if CHECK_SSL_VALIDITY else ['-c', 'http.sslVerify=false']),
62 without_query(without_fragment(link.url)),
63 ]
64 status = 'succeeded'
65 timer = TimedProgress(timeout, prefix=' ')
66 try:
67 result = run(cmd, cwd=str(output_path), timeout=timeout + 1)
68 if result.returncode == 128:
69 # ignore failed re-download when the folder already exists
70 pass
71 elif result.returncode > 0:
72 hints = 'Got git response code: {}.'.format(result.returncode)
73 raise ArchiveError('Failed to save git clone', hints)
74
75 chmod_file(output, cwd=str(out_dir))
76
77 except Exception as err:
78 status = 'failed'
79 output = err
80 finally:
81 timer.end()
82
83 return ArchiveResult(
84 cmd=cmd,
85 pwd=str(out_dir),
86 cmd_version=GIT_VERSION,
87 output=output,
88 status=status,
89 **timer.stats,
90 )

Callers

nothing calls this directly

Calls 6

endMethod · 0.95
TimedProgressClass · 0.85
ArchiveErrorClass · 0.85
chmod_fileFunction · 0.85
runFunction · 0.50
ArchiveResultClass · 0.50

Tested by

no test coverage detected