发现长图案_随笔

发现长图案

更新资料

我对算法进行了改进，该算法平均取O（M +N ^ 2）和O（M + N）的内存需求。主要与以下描述的协议相同，但是为了计算ech差异D 的可能因子A，K ，我预装了一个表。对于M = 10 ^ 7，此表花费不到一秒钟的时间。

我做了一个不到10分钟即可解决N = 10 ^ 5个不同的随机整数元素的C实现。

这是C语言中的源代码：执行即可：gcc -O3 -o findgeo findgeo.c

#include <stdio.h>#include <stdlib.h>#include <math.h>#include <memory.h>#include <time.h>struct Factor {    int a;    int k;    struct Factor *next;};struct Factor *factors = 0;int factorsL=0;void ConstructFactors(int R) {    int a,k,C;    int R2;    struct Factor *f;    float seconds;    clock_t end;    clock_t start = clock();    if (factors) free(factors);    factors = malloc (sizeof(struct Factor) *((R>>1) + 1));    R2 = R>>1 ;    for (a=0;a<=R2;a++) {        factors[a].a= a;        factors[a].k=1;        factors[a].next=NULL;    }    factorsL=R2+1;    R2 = floor(sqrt(R));    for (k=2; k<=R2; k++) {        a=1;        C=a*k*(k+1);        while (C<R) { C >>= 1; f=malloc(sizeof(struct Factor)); *f=factors[C]; factors[C].a=a; factors[C].k=k; factors[C].next=f; a++; C=a*k*(k+1);        }    }    end = clock();    seconds = (float)(end - start) / CLOCKS_PER_SEC;    printf("Construct Table: %fn",seconds);}void DestructFactors() {    int i;    struct Factor *f;    for (i=0;i<factorsL;i++) {        while (factors[i].next) { f=factors[i].next->next; free(factors[i].next); factors[i].next=f;        }    }    free(factors);    factors=NULL;    factorsL=0;}int ipow(int base, int exp){    int result = 1;    while (exp)    {        if (exp & 1) result *= base;        exp >>= 1;        base *= base;    }    return result;}void findGeo(int **bestSolution, int *bestSolutionL,int *Arr, int L) {    int i,j,D;    int mustExistToBeBetter;    int R=Arr[L-1]-Arr[0];    int *possibleSolution;    int possibleSolutionL=0;    int exp;    int NextVal;    int idx;    int kMax,aMax;    float seconds;    clock_t end;    clock_t start = clock();    kMax = floor(sqrt(R));    aMax = floor(R/2);    ConstructFactors(R);    *bestSolutionL=2;    *bestSolution=malloc(0);    possibleSolution = malloc(sizeof(int)*(R+1));    struct Factor *f;    int *H=malloc(sizeof(int)*(R+1));    memset(H,0, sizeof(int)*(R+1));    for (i=0;i<L;i++) {        H[ Arr[i]-Arr[0] ]=1;    }    for (i=0; i<L-2;i++) {        for (j=i+2; j<L; j++) { D=Arr[j]-Arr[i]; if (D & 1) continue; f = factors + (D >>1); while (f) {     idx=Arr[i] + f->a * f->k  - Arr[0];     if ((f->k <= kMax)&& (f->a<aMax)&&(idx<=R)&&H[idx]) {         if (f->k ==1) {  mustExistToBeBetter = Arr[i] + f->a * (*bestSolutionL);         } else {  mustExistToBeBetter = Arr[i] + f->a * f->k * (ipow(f->k,*bestSolutionL) - 1)/(f->k-1);         }         if (mustExistToBeBetter< Arr[L-1]+1) {  idx=  floor(mustExistToBeBetter - Arr[0]);         } else {  idx = R+1;         }         if ((idx<=R)&&H[idx]) {  possibleSolution[0]=Arr[i];  possibleSolution[1]=Arr[i] + f->a*f->k;  possibleSolution[2]=Arr[j];  possibleSolutionL=3;  exp = f->k * f->k * f->k;  NextVal = Arr[j] + f->a * exp;  idx=NextVal - Arr[0];  while ( (idx<=R) && H[idx]) {      possibleSolution[possibleSolutionL]=NextVal;      possibleSolutionL++;      exp = exp * f->k;      NextVal = NextVal + f->a * exp;      idx=NextVal - Arr[0];  }  if (possibleSolutionL > *bestSolutionL) {      free(*bestSolution);      *bestSolution = possibleSolution;      possibleSolution = malloc(sizeof(int)*(R+1));      *bestSolutionL=possibleSolutionL;      kMax= floor( pow (R, 1/ (*bestSolutionL) ));      aMax= floor(R /  (*bestSolutionL));  }         }     }     f=f->next; }        }    }    if (*bestSolutionL == 2) {        free(*bestSolution);        possibleSolutionL=0;        for (i=0; (i<2)&&(i<L); i++ ) { possibleSolution[possibleSolutionL]=Arr[i]; possibleSolutionL++;        }        *bestSolution = possibleSolution;        *bestSolutionL=possibleSolutionL;    } else {        free(possibleSolution);    }    DestructFactors();    free(H);    end = clock();    seconds = (float)(end - start) / CLOCKS_PER_SEC;    printf("findGeo: %fn",seconds);}int compareInt (const void * a, const void * b){    return *(int *)a - *(int *)b;}int main(void) {    int N=100000;    int R=10000000;    int *A = malloc(sizeof(int)*N);    int *Sol;    int SolL;    int i;    int *S=malloc(sizeof(int)*R);    for (i=0;i<R;i++) S[i]=i+1;    for (i=0;i<N;i++) {        int r = rand() % (R-i);        A[i]=S[r];        S[r]=S[R-i-1];    }    free(S);    qsort(A,N,sizeof(int),compareInt);    findGeo(&Sol,&SolL,A,N);    printf("[");    for (i=0;i<SolL;i++) {        if (i>0) printf(",");        printf("%d",Sol[i]);    }    printf("]n");    printf("Size: %dn",SolL);    free(Sol);    free(A);    return EXIT_SUCCESS;}

我将尝试证明，我提出的算法
O（N`2 + M）平均而言是均匀
分布的随机序列。我不是数学家，也不习惯做
这种演示，因此，请尽一切可能纠正我
看到的任何错误。

有4个缩进循环，两个第一个是N ^ 2因子。M用于
计算可能的因素表）。

每个循环平均仅执行一次第三循环。您可以看到
此检查了预先计算的因子表的大小。
当N-> inf时，大小为M。因此，每对平均步长为M / M = 1。

因此，证明恰好检查了第四循环。（遍历
完好的序列的序列对所有对的执行均小于或等于O（N ^ 2）。

为了证明这一点，我将考虑两种情况：一种是M >> N，另一种是
M〜=N。其中M是初始数组的最大差：M = S（n）-S（1）。

对于第一种情况，（M >> N）找到巧合的概率为p = N / M。要
开始一个序列，它必须与第二个元素和b + 1个元素重合，其中b是
到目前为止最佳序列的长度。因此循环将进入
N ^ 2 （N / M）^ 2时间。
该系列的平均长度（假设无穷级数）为p /（1-p）= N /（MN）。因此，
执行循环的总次数为N ^ 2 （N / M）^ 2 * N /（MN）。当
M >> N 时，它接近于0 。这里的问题是当M〜= N时。

现在让我们考虑M〜= N的情况。让我们认为b是
到目前为止的最佳序列长度。对于A = k = 1的情况，则序列必须
在Nb之前开始，因此序列数将为Nb，并且
循环所需的时间将最大为（Nb）* b。

对于A> 1和k = 1，我们可以推断到
（NA * b / d）* bd为M / N（
数字之间的平均距离）。如果我们将所有A的值相加（从1到dN / b），那么
我们看到的上限是：

sum_ {A = 1} ^ {dN / b} left（N- frac {Ab} {d} right）b = frac {N ^ 2d} {2}

对于k> = 2的情况，我们看到序列必须在之前开始
NA * k ^ b / d，因此循环将输入的
平均值，A * k ^ b / d）* b并将
所有从1到dN / k ^ b的A 加起来，它的极限为

sum_ {A = 1} ^ {dN / k ^ b} left（N- frac {Ak ^ b} {d} right）b = frac {bN ^ 2d} {2k ^ b}

在此，最坏的情况是当b最小时。因为我们正在考虑最小
序列，所以让我们考虑b = 2的最坏情况，因此
对于给定的k，第4个循环的通过次数将小于

frac {dN ^ 2} {k ^ 2} 。

如果我们将所有k从2加到无限，将是：

sum_ {k = 2} ^ { infty} frac {dN ^ 2} {k ^ 2} = dN ^ 2 left（ frac { pi ^ 2} {6} -1 right）

因此，将k = 1和k> = 2的所有遍加在一起，我们得到的最大值为：

frac {N ^ 2d} {2} + N ^ 2d left（ frac { pi ^ 2} {6} -1 right）= N ^ 2d left（ frac { pi ^ 2} {6 }- frac {1} {2} right） simeq 1.45N ^ 2d

注意，d ＝ M / N ＝ 1 / p。

因此，我们有两个限制，一个是当d = 1 / p = M / N变为1
时变为无限，另一个是当d变为无限时变为无限。因此，我们的限制是
两者中的最小值，最坏的情况是两种情况都交叉时。因此，如果我们
求解方程：

N ^ 2d left（ frac { pi ^ 2} {6}- frac {1} {2} right）= N ^ 2 left（ frac {N} {M} right）^ 2 frac {N} {MN} = N ^ 2 left（ frac {1} {d} right）^ 2 frac {1} {d-1}

我们看到最大值是在d = 1.353时

因此证明了第四循环总共将被处理少于1.55N ^ 2 次。

当然，这是一般情况。在最坏的情况下，我无法找到一种方法来生成其第四级循环高于O（N ^ 2）的级数，并且我坚信它们不存在，但是我不是数学家来证明这一点。

旧答案

这是O（（n ^ 2）* cube_root（M））的平均值的解决方案，其中M是数组的第一个元素与最后一个元素之间的差。并且内存需求为O（M + N）。

1.-构造一个长度为M的数组H，以使M [i-S [0]] = true（如果i存在于
初始数组中），而返回false（如果不存在）。

2.-对于数组S [j]，S [i]中的每一对，执行以下 *** 作：

2.1检查它是否可能是解决方案的第一和第三要素。为此
，计算满足方程S（i）= S（j）+AK + AK ^ 2的所有可能的A，K对。检查此SO
问题以查看如何解决此问题。并检查是否存在第二个元素：S [i] + A * K

2.2还应检查是否存在元素比我们拥有的最佳解决方案更进一步。例如，如果到目前为止我们拥有的最佳解决方案是4个元素长，那么请检查是否存在元素A [j] + A K + A K ^ 2+ A K ^ 3 + A K ^ 4

2.3如果2.1和2.2是正确的，则迭代此系列多长时间，并将其设置为bestSolution，直到现在为止的时间比上一个更长。

这是javascript中的代码：

function getAKs(A) {    if (A / 2 != Math.floor(A / 2)) return [];    var solution = [];    var i;    var SR3 = Math.pow(A, 1 / 3);    for (i = 1; i <= SR3; i++) {        var B, C;        C = i;        B = A / (C * (C + 1));        if (B == Math.floor(B)) { solution.push([B, C]);        }        B = i;        C = (-1 + Math.sqrt(1 + 4 * A / B)) / 2;        if (C == Math.floor(C)) { solution.push([B, C]);        }    }    return solution;}function getBestGeometricSequence(S) {    var i, j, k;    var bestSolution = [];    var H = Array(S[S.length-1]-S[0]);    for (i = 0; i < S.length; i++) H[S[i] - S[0]] = true;    for (i = 0; i < S.length; i++) {        for (j = 0; j < i; j++) { var PossibleAKs = getAKs(S[i] - S[j]); for (k = 0; k < PossibleAKs.length; k++) {     var A = PossibleAKs[k][0];     var K = PossibleAKs[k][17];     var mustExistToBeBetter;     if (K==1) {         mustExistToBeBetter = S[j] + A * bestSolution.length;     } else {         mustExistToBeBetter = S[j] + A * K * (Math.pow(K,bestSolution.length) - 1)/(K-1);     }     if ((H[S[j] + A * K - S[0]]) && (H[mustExistToBeBetter - S[0]])) {         var possibleSolution=[S[j],S[j] + A * K,S[i]];         exp = K * K * K;         var NextVal = S[i] + A * exp;         while (H[NextVal - S[0]] === true) {  possibleSolution.push(NextVal);  exp = exp * K;  NextVal = NextVal + A * exp;         }         if (possibleSolution.length > bestSolution.length) {  bestSolution = possibleSolution;         }     } }        }    }    return bestSolution;}//var A= [ 1, 2, 3,5,7, 15, 27, 30,31, 81];var A=[];for (i=1;i<=3000;i++) {    A.push(i);}var sol=getBestGeometricSequence(A);$("#result").html(JSON.stringify(sol));

您可以在此处检查代码：http : //jsfiddle.net/6yHyR/1/

我之所以维持另一种解决方案，是因为我相信当M比N大时，它仍然更好。

欢迎分享，转载请注明来源：内存溢出

原文地址: http://outofmemory.cn/zaji/5013552.html

发现长图案

发表评论

评论列表（0条）