MCPcopy
hub / github.com/elastic/go-elasticsearch / Run

Method Run

_examples/xkcdsearch/cmd/xkcd/commands/index.go:127–168  ·  view source on GitHub ↗

Run launches the crawler.

()

Source from the content-addressed store, hash-verified

125
126// Run launches the crawler.
127func (c *Crawler) Run() {
128 var (
129 wg sync.WaitGroup
130 nextURL string
131 )
132
133 rand.Seed(time.Now().Unix())
134
135 // Setup worker goroutines
136 for i := 1; i < c.workers+1; i++ {
137 c.log.Debug().Msgf("Creating worker %d", i)
138 wg.Add(1)
139 go func() {
140 defer wg.Done()
141
142 for url := range c.queue {
143 c.ProcessURL(url)
144 }
145 }()
146 }
147
148 // Start crawling
149 if doc := c.ProcessURL(c.StartURL); doc.ID != "" {
150 id, err := strconv.Atoi(doc.ID)
151 if err != nil {
152 c.log.Fatal().Err(err).Msg("Cannot get latest comic ID")
153 }
154 nextURL = fmt.Sprintf("https://xkcd.com/%d/info.0.json", id)
155 }
156
157 for {
158 if nextURL = c.NextURL(nextURL); nextURL == "" {
159 close(c.queue)
160 break
161 }
162
163 c.log.Debug().Str("URL", nextURL).Msg("Adding URL to queue")
164 c.queue <- nextURL
165 }
166
167 wg.Wait()
168}
169
170// ProcessURL parses the JSON data and stores document.
171func (c *Crawler) ProcessURL(url string) (doc xkcdsearch.Document) {

Callers 2

TestStoreFunction · 0.45
index.goFile · 0.45

Calls 5

ProcessURLMethod · 0.95
NextURLMethod · 0.95
AddMethod · 0.65
SeedMethod · 0.45
DebugMethod · 0.45

Tested by 1

TestStoreFunction · 0.36