如何在超时后取消任务等待

如何在超时后取消任务等待,第1张

如何在超时后取消任务等待

已更新

WebBrowser
可以在Github上找到基于的控制台Web
scraper 的最新版本。

已更新
:添加

WebBrowser
对象池以进行多个并行下载。

您是否有示例说明如何在控制台应用程序中执行此 *** 作?另外,我也不认为webBrowser可以是一个类变量,因为我要为每个变量并行运行整个过程,从而迭代数千个URL

以下是或多或少

WebBrowser
基于通用 Web的scraper的实现
,可作为控制台应用程序使用。这是我以前
WebBrowser
相关工作的合并,包括问题中引用的代码:

  • 捕获不透明的网页图像

  • 加载具有动态AJAX内容的页面

  • 创建STA消息循环线程以用于

    WebBrowser

  • 依次加载一组URL

  • 使用以下命令打印一组URL

    WebBrowser

  • 网页UI自动化

几点:

  • 可重用的

    MessageLoopApartment
    类用于使用其自己的消息泵来启动和运行WinForms STA线程。可以从 控制台应用程序 使用它,如下所示。此类公开了TPL任务计划程序(
    FromCurrentSynchronizationContext
    )和一组
    Task.Factory.StartNew
    包装程序以使用此任务计划程序。

  • 这是在单独的STA线程上

    async/await
    运行
    WebBrowser
    导航任务的绝佳工具。这样,
    WebBrowser
    就可以在该线程上创建,导航和销毁对象。虽然,
    MessageLoopApartment
    不是
    WebBrowser
    专门捆绑的。

  • 使用浏览器功能控件启用HTML5渲染很重要,因为

    WebBrowser
    默认情况下,对象在IE7仿真模式下运行。这就是
    SetFeatureBrowserEmulation
    下面的内容。

  • 不一定总是可以确定网页何时以100%的概率完成渲染。一些页面非常复杂,并使用连续的AJAX更新。但是,通过

    documentCompleted
    首先处理事件,然后轮询页面的当前HTML快照以查找更改并检查
    WebBrowser.IsBusy
    属性,我们可以非常接近。这就是
    NavigateAsync
    下面的内容。

  • 如果页面渲染永无休止(请注意

    CancellationTokenSource
    CreatelinkedTokenSource
    ),则上述内容中会出现超时逻辑。

    using Microsoft.Win32;
    using System;
    using System.Threading;
    using System.Threading.Tasks;
    using System.Windows.Forms;

    namespace Console_22239357
    {
    class Program
    {
    // by Noseratio - https://stackoverflow.com/a/22262976/1768303

        // main logic    static async Task ScrapSitesAsync(string[] urls, CancellationToken token)    {        using (var apartment = new MessageLoopApartment())        { // create WebBrowser inside MessageLoopApartment var webBrowser = apartment.Invoke(() => new WebBrowser()); try {     foreach (var url in urls)     {         Console.WriteLine("URL:n" + url);         // cancel in 30s or when the main token is signalled         var navigationCts = CancellationTokenSource.CreatelinkedTokenSource(token);         navigationCts.CancelAfter((int)TimeSpan.FromSeconds(30).TotalMilliseconds);         var navigationToken = navigationCts.Token;         // run the navigation task inside MessageLoopApartment         string html = await apartment.Run(() =>  webBrowser.NavigateAsync(url, navigationToken), navigationToken);         Console.WriteLine("HTML:n" + html);     } } finally {     // dispose of WebBrowser inside MessageLoopApartment     apartment.Invoke(() => webBrowser.Dispose()); }        }    }    // entry point    static void Main(string[] args)    {        try        { WebBrowserExt.SetFeatureBrowserEmulation(); // enable HTML5 var cts = new CancellationTokenSource((int)TimeSpan.FromMinutes(3).TotalMilliseconds); var task = ScrapSitesAsync(     new[] { "http://example.com", "http://example.org", "http://example.net" },     cts.Token); task.Wait(); Console.WriteLine("Press Enter to exit..."); Console.ReadLine();        }        catch (Exception ex)        { while (ex is AggregateException && ex.InnerException != null)     ex = ex.InnerException; Console.WriteLine(ex.Message); Environment.Exit(-1);        }    }}/// <summary>/// WebBrowserExt - WebBrowser extensions/// by Noseratio - https://stackoverflow.com/a/22262976/1768303/// </summary>public static class WebBrowserExt{    const int POLL_DELAY = 500;    // navigate and download     public static async Task<string> NavigateAsync(this WebBrowser webBrowser, string url, CancellationToken token)    {        // navigate and await documentCompleted        var tcs = new TaskCompletionSource<bool>();        WebBrowserdocumentCompletedEventHandler handler = (s, arg) => tcs.TrySetResult(true);        using (token.Register(() => tcs.TrySetCanceled(), useSynchronizationContext: true))        { webBrowser.documentCompleted += handler; try {     webBrowser.Navigate(url);     await tcs.Task; // wait for documentCompleted } finally {     webBrowser.documentCompleted -= handler; }        }        // get the root element        var documentElement = webBrowser.document.GetElementsByTagName("html")[0];        // poll the current HTML for changes asynchronosly        var html = documentElement.OuterHtml;        while (true)        { // wait asynchronously, this will throw if cancellation requested await Task.Delay(POLL_DELAY, token); // continue polling if the WebBrowser is still busy if (webBrowser.IsBusy)     continue; var htmlNow = documentElement.OuterHtml; if (html == htmlNow)     break; // no changes detected, end the poll loop html = htmlNow;        }        // consider the page fully rendered         token.ThrowIfCancellationRequested();        return html;    }    // enable HTML5 (assuming we're running IE10+)    // more info: https://stackoverflow.com/a/18333982/1768303    public static void SetFeatureBrowserEmulation()    {        if (System.ComponentModel.LicenseManager.UsageMode != System.ComponentModel.LicenseUsageMode.Runtime) return;        var appName = System.IO.Path.GetFileName(System.Diagnostics.Process.GetCurrentProcess().MainModule.FileName);        Registry.SetValue(@"HKEY_CURRENT_USERSoftwareMicrosoftInternet ExplorerMainFeatureControlFEATURE_BROWSER_EMULATION", appName, 10000, RegistryValueKind.DWord);    }}/// <summary>/// MessageLoopApartment/// STA thread with message pump for serial execution of tasks/// by Noseratio - https://stackoverflow.com/a/22262976/1768303/// </summary>public class MessageLoopApartment : IDisposable{    Thread _thread; // the STA thread    TaskScheduler _taskScheduler; // the STA thread's task scheduler    public TaskScheduler TaskScheduler { get { return _taskScheduler; } }    /// <summary>MessageLoopApartment constructor</summary>    public MessageLoopApartment()    {        var tcs = new TaskCompletionSource<TaskScheduler>();        // start an STA thread and gets a task scheduler        _thread = new Thread(startArg =>        { EventHandler idleHandler = null; idleHandler = (s, e) => {     // handle Application.Idle just once     Application.Idle -= idleHandler;     // return the task scheduler     tcs.SetResult(TaskScheduler.FromCurrentSynchronizationContext()); }; // handle Application.Idle just once // to make sure we're inside the message loop // and SynchronizationContext has been correctly installed Application.Idle += idleHandler; Application.Run();        });        _thread.SetApartmentState(ApartmentState.STA);        _thread.IsBackground = true;        _thread.Start();        _taskScheduler = tcs.Task.Result;    }    /// <summary>shutdown the STA thread</summary>    public void Dispose()    {        if (_taskScheduler != null)        { var taskScheduler = _taskScheduler; _taskScheduler = null; // execute Application.ExitThread() on the STA thread Task.Factory.StartNew(     () => Application.ExitThread(),     CancellationToken.None,     TaskCreationOptions.None,     taskScheduler).Wait(); _thread.Join(); _thread = null;        }    }    /// <summary>Task.Factory.StartNew wrappers</summary>    public void Invoke(Action action)    {        Task.Factory.StartNew(action, CancellationToken.None, TaskCreationOptions.None, _taskScheduler).Wait();    }    public TResult Invoke<TResult>(Func<TResult> action)    {        return Task.Factory.StartNew(action, CancellationToken.None, TaskCreationOptions.None, _taskScheduler).Result;    }    public Task Run(Action action, CancellationToken token)    {        return Task.Factory.StartNew(action, token, TaskCreationOptions.None, _taskScheduler);    }    public Task<TResult> Run<TResult>(Func<TResult> action, CancellationToken token)    {        return Task.Factory.StartNew(action, token, TaskCreationOptions.None, _taskScheduler);    }    public Task Run(Func<Task> action, CancellationToken token)    {        return Task.Factory.StartNew(action, token, TaskCreationOptions.None, _taskScheduler).Unwrap();    }    public Task<TResult> Run<TResult>(Func<Task<TResult>> action, CancellationToken token)    {        return Task.Factory.StartNew(action, token, TaskCreationOptions.None, _taskScheduler).Unwrap();    }}

    }



欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/zaji/5505842.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-12-13
下一篇 2022-12-12

发表评论

登录后才能评论

评论列表(0条)

保存