Run launches the crawler.
()
| 125 | |
| 126 | // Run launches the crawler. |
| 127 | func (c *Crawler) Run() { |
| 128 | var ( |
| 129 | wg sync.WaitGroup |
| 130 | nextURL string |
| 131 | ) |
| 132 | |
| 133 | rand.Seed(time.Now().Unix()) |
| 134 | |
| 135 | // Setup worker goroutines |
| 136 | for i := 1; i < c.workers+1; i++ { |
| 137 | c.log.Debug().Msgf("Creating worker %d", i) |
| 138 | wg.Add(1) |
| 139 | go func() { |
| 140 | defer wg.Done() |
| 141 | |
| 142 | for url := range c.queue { |
| 143 | c.ProcessURL(url) |
| 144 | } |
| 145 | }() |
| 146 | } |
| 147 | |
| 148 | // Start crawling |
| 149 | if doc := c.ProcessURL(c.StartURL); doc.ID != "" { |
| 150 | id, err := strconv.Atoi(doc.ID) |
| 151 | if err != nil { |
| 152 | c.log.Fatal().Err(err).Msg("Cannot get latest comic ID") |
| 153 | } |
| 154 | nextURL = fmt.Sprintf("https://xkcd.com/%d/info.0.json", id) |
| 155 | } |
| 156 | |
| 157 | for { |
| 158 | if nextURL = c.NextURL(nextURL); nextURL == "" { |
| 159 | close(c.queue) |
| 160 | break |
| 161 | } |
| 162 | |
| 163 | c.log.Debug().Str("URL", nextURL).Msg("Adding URL to queue") |
| 164 | c.queue <- nextURL |
| 165 | } |
| 166 | |
| 167 | wg.Wait() |
| 168 | } |
| 169 | |
| 170 | // ProcessURL parses the JSON data and stores document. |
| 171 | func (c *Crawler) ProcessURL(url string) (doc xkcdsearch.Document) { |