Take the 2-minute tour ×
Stack Overflow is a question and answer site for professional and enthusiast programmers. It's 100% free, no registration required.

I'm still new at this threading thingy. Lets say I have 50000 URLs and I want to get the contents of these URLs simultaneously, like processing every 10 URLs together. then once one of these URLs finishes processing, the program should add another 1 from the queue list until it finishes processing all URLs in the list. now how can I do that with C#.. here is the code I'm doing so far..

 class RequestState
        {
            public WebRequest Request;

        // holds the request 
        public object Data;

        // store any data in this 
        public string SiteUrl;

        // holds the UrlString to match up results (Database lookup, etc). 

        public RequestState(WebRequest request, object data, string siteUrl)
        {
            this.Request = request;
            this.Data = data;
            this.SiteUrl = siteUrl;
        }
    }

    private void PROCESS_URLS_Click(object sender, EventArgs e)
    {
        //run the process
        process_URLs();
    }

private int ThreadsCount = 0;

  private void process_URLs()
    {
       //count threads number
        ThreadsCount = URLS_LISTVIEW.SelectedItems.Count;

       //loop through all URLs in listview
        for (int i = 0; i < URLS_LISTVIEW.SelectedItems.Count; i++)
        {
            try
            {
                //get url string
                string myURLs = URLS_LISTVIEW.SelectedItems[i].SubItems[0].Text.Trim();

                // for each URL in the collection...
                WebRequest request = HttpWebRequest.Create(myURLs);
                request.Method = "GET";
                object data = new object();

                RequestState state = new RequestState(request, data, myURLs);
                IAsyncResult result = request.BeginGetResponse(new AsyncCallback(UpdateItem), state);
                ThreadPool.RegisterWaitForSingleObject(result.AsyncWaitHandle, new WaitOrTimerCallback(ScanTimeoutCallback), state, (30 * 1000), true);

            }
            catch (ThreadStateException es)
            {
                MessageBox.Show(es.Message);
            }

        }



    }




 private void UpdateItem(IAsyncResult result)
    {
        RequestState state = (RequestState)result.AsyncState;
        WebRequest request = (WebRequest)state.Request;
        try
        {// grab the custom state object
            // get the Response
            HttpWebResponse response = (HttpWebResponse)request.EndGetResponse(result);

            // process the response...
            Stream s = (Stream)response.GetResponseStream();
            StreamReader readStream = new StreamReader(s);

            //data grabbed
            string dataString = readStream.ReadToEnd();
            response.Close();
            s.Close();
            readStream.Close();



        //finished grabbing content for this thread.
        ThreadsCount = ThreadsCount - 1;


        //if all threads finished running then execute final code to tell the user the process finished
        if (ThreadsCount < 1)
        {
            //show message
            MessageBox.Show("finished");
        }

       // Thread.Sleep(400);

    }





private static void ScanTimeoutCallback(object state, bool timedOut)
    {
        if (timedOut)
        {
            RequestState reqState = (RequestState)state;

            if (reqState != null)
                reqState.Request.Abort();


        }
    }

any ideas would be appreciated :)

kind regards,

share|improve this question

2 Answers 2

up vote 5 down vote accepted

Have a look at the TPL, there's an option to specify the maximum parallelism:

List<string> UriList = new List<string>();
...
Parallel.ForEach(UriList, 
                 new ParallelOptions() {MaxDegreeOfParallelism=10}, 
                 (x) =>
{
    ProcessUrl(x);
});

This would process at most 10 Urls in parallel since we use the overload of Parallel.Foreach() that allow us to specify MaxDegreeOfParallelism.

Edit:

Here a simple example that downloads the Html from http://google.com 50 times in parallel (but only at a maximum with 10 threads concurrently) and stores the results in an array:

List<string> UriList = new List<string>();
for(int i =0;i<50;i++)
    UriList.Add("http://google.com");

string[] HtmlResults = new string[UriList.Count];

Parallel.ForEach(UriList, 
                 new ParallelOptions() { MaxDegreeOfParallelism = 10 }, 
                 (url, i, j) =>
{
    WebClient wc = new WebClient();
    HtmlResults[j] = wc.DownloadString(url);
});

Not to create more confusion but in your particular case PLINQ would also work very well since there are no dependencies between the item to process, and you have an actual result that the URL is "transformed" into:

var htmlResultList = UriList.AsParallel()
                            .WithDegreeOfParallelism(10)
                            .AsOrdered()
                            .Select(url => { WebClient wc = new WebClient(); return wc.DownloadString(url); })
                            .ToList();
share|improve this answer
    
this means I need to wrote the whole thing from scratch? –  Robin Van Persi Mar 20 '11 at 2:06
    
you can still re-use the code that actually processes a Url, like the WebRequest etc. - but personally I would even simplify that by using a WebClient. Think of the long term benefit though - there is much, much less code to maintain. –  BrokenGlass Mar 20 '11 at 2:10
    
can you show me a sample? I'm really lost here.. :( –  Robin Van Persi Mar 20 '11 at 2:21
    
@ermac2014: Updated with an example –  BrokenGlass Mar 20 '11 at 2:35
    
thanks for this :).. but now when I run the code the whole UI freezes until it finishes grabbing all contents from the 50 URLs any ideas? –  Robin Van Persi Mar 20 '11 at 2:46

(This should be a comment under @BrokenGlass, but I can't post comments yet)

You can take a look at this article on how to use Parallel Processing and PLINQ to do what you're looking for. The entire set of articles that precede it have some good information as well.

Edit: If this is a standalone, spawn a new thread to run this portion in the background so it doesn't result in an unresponsive UI.

Edit 2: If you want, you can also throw your strings in a ConcurrentQueue so you can add items from the UI while looking them up.

share|improve this answer
    
you mean I need to use a new thread to do this or to use background worker? or it doesn't matter both will do same thing..? –  Robin Van Persi Mar 20 '11 at 2:59
    
@ermac2014 - You can do either. Just pushing the work off to some other thread not associated with your UI will keep it from hanging. Then, when the work is complete, just have some callback update the UI with the results. –  James D'Angelo Mar 20 '11 at 3:01
    
well now that makes sense :) I will let you know if I need further help. thank you for the tips really appreciated.. –  Robin Van Persi Mar 20 '11 at 3:04
    
No worries! Glad if I could be of assistance. –  James D'Angelo Mar 20 '11 at 3:08

Your Answer

 
discard

By posting your answer, you agree to the privacy policy and terms of service.

Not the answer you're looking for? Browse other questions tagged or ask your own question.