From efdd3ec965d098d766a2d63dbe8a6745f0b0a87b Mon Sep 17 00:00:00 2001 From: Zovjsra <4703michael@gmail.com> Date: Sun, 3 Jan 2021 23:47:49 +0800 Subject: [PATCH] =?UTF-8?q?=E5=93=88=E5=93=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- __pycache__/dataHandlerPTT.cpython-37.pyc | Bin 8867 -> 8938 bytes app.py | 2 +- dataHandlerPTT.py | 6 +- resource/stopWords.txt | 7 +- static/css/main.css | 32 +++++- static/js/ptt.js | 94 ++++++++++++++---- static/js/pttPush.js | 31 +++++- .../sententree/dist/sententree-standalone.js | 2 +- templates/generalTxt.html | 3 +- templates/ptt.html | 25 +++-- templates/ptt_push.html | 6 ++ 11 files changed, 171 insertions(+), 37 deletions(-) diff --git a/__pycache__/dataHandlerPTT.cpython-37.pyc b/__pycache__/dataHandlerPTT.cpython-37.pyc index d44fc1294e4bd6fb9b720c1a654bd439678e55a3..fcedf9766d18b9867e032c28e003122ce93bc630 100644 GIT binary patch delta 2571 zcmb7_+jCPz7{Je-+)fhOrfr%gY11}sgCz83t6ZE~gqA7;LItD{2kU8eTLQVU=Trg1 zR4sKJ9Ry~*aw$Q^@x`~!sH1QG1^VDP?hMZOAR|0D^5A%R(eK-ElNT@#zukQM?YHOK z@3NOSU*BA_siwxK!SCXmKTq8_QxhR?oO$Svd0Lxpr*-KL7NG%JPaA0CNyn+4bo@v+ zi&r`?X=B>{aJq|iRk|-}Fvcng)^&*x?KC+Yn$#vWN}kbnYI`*rq)jKZ^hz3{%`iSh z!!!cpDjKCNFeX`wt){K#bsD2>C!FaX+DAEY&f4J0o#4Zb(-){*;#RxeHD+GawVch z*|`9pT6Tg(-*!%5+cgA_h$YviwGo`g07-|``vxS*N>WG?>B$reHY?a_jkkfq-@f5o z*0M9jd}-uy(d>R)PkP03?xA>{1$wNIVN*F)n6(c`XQo&>DqSU>E!asvPlzRVXMGe* zq`jP3b2)pWXz?!btvei7Clj8)ufRiO?agy1crn8)>k;AeY$fZL_jn%F8_>kKbk5|= zS>*@s78ksOq*i?6O*z03Q6Fj+LEnav$I#p+#3*76F^o{h>AVx?UAF{xT;wZ3_OdwR zGgGMfKROED=v%_-`o1=^3 zrC6^pib)4omZsD~sc3<#J8*jkB92g3W5O(hDUClJ(%`dL`-8{tiWmv* zO{l~m)pWVXNnSAs%dU#| znnwTUp6kL0t?#+{slD(W$@?7gtH_6>_*u@eBfO z@|}o2#4f~cL_cCsTx?z!T@O>~m}RzfGxt_YAXhXgG$ebp z+VCttq>|-^U7Egv_y}JT7orhzZ29x(D&qOGY`|mio4DGN^n6`j0rM|KsP+83-Edmx zKDw&xrSXW?tQB;coORrMBhr)jQlus|yRj0yq#X~~O$S5e08P==C-vj?<@!o<*^Rp8 zKqU<84R+*UwCtfhw3qgsbR2KAb=pq{>=xUxpq2Howm|5^3r^5)C^y=zbnv{(0o{E_ z^u&fY+%B+ci+mpM3rmY*8L6tdRaeiV0E>sefKa7niceyF4_-X|;-A<=N~PA#v>ZiK z|BVdKfNTJ6l~ZxFEwKSBh%>|tVj}<&`F7K&miL`%I)F-9#6iTgxY9P-a#uE^umH&Z z;sAW%7i-!l=3~-N*^CKq&6o+vwuDN92I^U0&N4$b~74aHC*3MaM zsx)UEm{LuPFW|}v#G?4Uy_0Ma;g0TU)viyW^ld~J0=ooeI5gmDKBG_|-a)*JcpdQ` z;(f$_uT=V=0LQ8hHi)GTx_3}FNG+tRK>{SK>qIB~BiNT5;2{{G6c2D_(rjTi?K+rc z(`L1}7t!<_DtM+!+03CF<7ZKH1cClYk&Zt>q&OFEoxg}e)g3;>*#*Qo#CZf3K3BIk zfU^n!ACerC@aUT~mn}f0<0C9x;C8lDZ853~scXikkncf^BN`Fv>Oa641Tdv3kM6n& wbtl+?r7@f@(zzTPfh^Ok?P5>onikzjoO%E*#UN3K;WZLQkI@u}8xw}-U*jh?j{pDw delta 2525 zcmb7_TX0iF7=X{_K)T zOwrxmYyS@kt+;sWZZZzWJ^*I*vzQoN*k=I66K#gD?L7rXuh{%}l=i&;EP) zcmK=Y?fa0WgbsW8Z(GwzPFRuP;S_{!KT28%m<*aRL5t24B3&RalnqHk5+(bkL1|c`K3Z{3N;XnI4L}>Cm9z@lCuop{ zpk1vsX>l5!v(Qzv`kXbHpf$7>`kQHl)TRI4-=3mL( z6>%_FSI}%ZNqodlSnWckbr-U5}pL8-!pGq48Ii1zRw@D_Gj! zH{G=@PogMx=mp7i=teG|*XX1BB|tf}@U8nT5)dm0ckIKt&mbb)@N91p8jMrsMyyAO ztF({hrm}|q;G+Czh~gRkws;t6^OA-poF4(~JP2y*cq&Ux z8^Z=1_Z%uad8+)W@ckPoxx|Sh2G#6@c9nlo-bW5E$lh6bT3C!OIjzJ-T4R~k7(H!| zjl?oB%J$%5dr@bjs-dJ-NBLIZupJ|g~v>%k;-TX43!xy%nQLp5aZWW##3oS zV;VK>!bm6%{L5f4lyv4xDX;0K4FXO($=1Pm*2Vt_cCC8ETP_ijqP*PZ+d}8b75;VT z<7%`Wf-9ZVHK=4{{Gt4~7!EW$A%Ju{jFK!Xh>NfrAdTU*;m;3$5B#xc==p_j!cD~S zdqIKX|LdjdxZ_q~1&BBKn(Db}+Ej4TXh^D(Cgi1uAUTB3L1IWUe8q}+>5SL#PX-EJ z8mEa_%bAshmBq?}9mf{D#VXMIjNoLb;GoU4g|^PhXDSQ}ZKLf**pO$Wf+Z-;5V~f@ z3gdl+3S$-Rn3JK9clI-=2mDaY`mOhKtW*n+fYl97TxiI(Et5 zw*2&IVJ?s@W3Vm&G!)%P-sEuvK9AXrqA+`Y^jPI9?QiA`NPsedk z3NeXLIf?A3d?0MGGYd%Fk@?Zcz;w0grrNj)FHElr0fnw)9SHR2V?tX5^e!BS@yre* zrV(cmFC$(-%plGoUPZiycpdSE#JlT!WH;Ym*ElNb@_Foj8&QwI5|OlGIa>;2?7W2F zh<6ZTxyvZMi+K1pO&26tZ|Q(u{(T+oY_ce%45C&cUJ|reh=s5ZU|zh91)za+JI*wf zrm_=B+hj@`RoNiVM61fL;o2t=c$y^V(QVU7HzeBxyumsQV-8e`THr+KeS|0jS5cZn zTt~czzzk=2b@+flje%EQrRh{w)5Sw;+Tg^ge5rznA|q}%fwS;kVts%u%!i`5_A4j} z=iy-HPB}&n=?5&mDai;wj_NIzgFK>C41p66cUydrxH_Ilz=y?3@DC&0I{_e A;{X5v diff --git a/app.py b/app.py index af48a53..0502f74 100755 --- a/app.py +++ b/app.py @@ -67,7 +67,7 @@ def ptt_push(): @app.route('/ptt_push/init', methods=['POST']) def pttPushInit(): - author = 'yuetsu' + author = 'a58461351' pushes = pttPush.findAuthorPush( author=[author], stopwords=pttPush.defaultStopWords) result: dict = { diff --git a/dataHandlerPTT.py b/dataHandlerPTT.py index 3c0a5d4..0b62c86 100755 --- a/dataHandlerPTT.py +++ b/dataHandlerPTT.py @@ -31,6 +31,8 @@ with open('/home/vis/pttDatabase/PTTData/Gossiping/content/content.pck', 'rb') a defaultStopWords = [] data = PTTData('Gossiping', '/home/vis/pttDatabase/PTTData') +sentence_length = 100 +use_push_count = False with open('resource/stopWords.txt', 'r', encoding='UTF-8') as file: for word in file.readlines(): @@ -75,7 +77,7 @@ def contentProcess(content, text): result = [] for i in cutted: result.append(i) - if (len(result) >= 50): + if (len(result) >= sentence_length): sentenses.append(result.copy()) result = [] if (result != []): @@ -324,7 +326,7 @@ def getDefault(startDate, endDate): 'date': datetime.strptime(i['date'], '%Y%m%d%H%M%S').strftime("%a %b %d %H:%M:%S %Y"), 'part': i['content'][max(0, cut[0] - 20): min(len(i['content']), cut[1])].replace('\n', '') } - result.append([len(result), seq, 1000 + i['pushes']]) + result.append([len(result), seq, (1000 + i['pushes'])if use_push_count else 3000]) fileString = io.StringIO() writer = csv.writer(fileString, delimiter='\t') diff --git a/resource/stopWords.txt b/resource/stopWords.txt index 9aef6c5..4b61f5a 100755 --- a/resource/stopWords.txt +++ b/resource/stopWords.txt @@ -67,4 +67,9 @@ 那 只 所以 -講 \ No newline at end of file +講 +記者 +看 +者 +媒體 +和 \ No newline at end of file diff --git a/static/css/main.css b/static/css/main.css index 3480344..b38b365 100755 --- a/static/css/main.css +++ b/static/css/main.css @@ -112,6 +112,24 @@ html { transition-duration: 0.5s; } +#pttPageWindowContent { + display: block; + position: fixed; + background-color: #FFF; + left: 50%; + top: 50%; + transform: translate(-50%, -50%); + width: 90%; + height: 90%; + border: lightgray; + border-width: 1px; + border-style: solid; + border-radius: 20px; + padding: 26px; + z-index: 99; + box-shadow: 0px 5px 20px rgba(0, 0, 0, .3); +} + .deleteListElement { position: absolute; right: 0; @@ -141,6 +159,18 @@ li a { } .info { + background-color: rgba(255, 255, 255, 0.6); + animation: fadeIn 0.2s; + animation-fill-mode: forwards; + position: fixed; + display: flex; + top: 0; + width: 100%; + height: 100%; + z-index: 90; +} + +.info99 { background-color: rgba(255, 255, 255, 0.6); animation: fadeIn 0.2s; animation-fill-mode: forwards; @@ -189,7 +219,7 @@ li a { height: auto; border-radius: 15px; padding: 10px 15px; - z-index: 99; + z-index: 90; align-content: center; } diff --git a/static/js/ptt.js b/static/js/ptt.js index 3379fad..235a60d 100755 --- a/static/js/ptt.js +++ b/static/js/ptt.js @@ -76,7 +76,41 @@ function init() { hideIdfEditor() } }) + $('#pttPageWindow').click(function(e) { + if ($('#pttPageWindow').is(e.target)) { + hidePTTPage() + } + }) changeMode(0) + destroyCurrentGraph() + buildSentetree() +} + +function loadTemplate(num) { + templates = [{ + startDate: '2020-12-01', + endDate: '2020-12-31', + keyword: '', + mode: 1 + }, + { + startDate: '2020-01-01', + endDate: '2020-03-01', + keyword: '衛生紙', + mode: 2 + }, + { + startDate: '2020-01-11', + endDate: '2020-01-12', + keyword: '', + mode: 2 + } + ] + chosenTemp = templates[num] + setDate(chosenTemp.startDate, chosenTemp.endDate) + $('#keywordBox').val(chosenTemp.keyword) + changeMode(chosenTemp.mode) + sendRequest() } function clearStopWord() { @@ -117,8 +151,6 @@ function changeMode(_mode) { } } mode = _mode - destroyCurrentGraph() - buildSentetree() } function scrollIdfList() { @@ -170,8 +202,12 @@ function showIdfEditor() { .append($('') .attr('style', 'position: sticky; top: 0; background: white;') .append('單詞')) - .append($('')) - .append($('').attr('class', 'w3-right-align') + .append($('') + .attr('class', 'w3-center-align') + .attr('style', 'position: sticky; top: 0; background: white;') + .append('操作')) + .append($('') + .attr('class', 'w3-right-align') .attr('style', 'position: sticky; top: 0; background: white;') .append('單詞頻率') ) @@ -259,6 +295,15 @@ function hideIdfEditor() { $('#idfEditorLayer').addClass('hidden') } +function showPTTPage(url) { + $('#pttPageWindowContent iframe').attr('src', url) + $('#pttPageWindow').removeClass('hidden') +} + +function hidePTTPage() { + $('#pttPageWindow').addClass('hidden') +} + function updateIdfTable() { let wordList = $("#ieContainer").find("tr").slice(1).map((_, a) => { return [ @@ -315,6 +360,23 @@ function closeEventListner() { } function sendRequest() { + content = getContent() + startDate = $('#startDate').val() + endDate = $('#endDate').val() + console.log(content) + $.ajax({ + type: 'POST', + url: '/addRequest', + data: content, + contentType: 'application/json', + success: function(data) { + console.log(data) + changeGraph(data.Result) + } + }) +} + +function getContent() { content = JSON.stringify({ startDate: $('#startDate').val(), endDate: $('#endDate').val(), @@ -330,19 +392,7 @@ function sendRequest() { other: $('#other').is(':checked') } }) - startDate = $('#startDate').val() - endDate = $('#endDate').val() - console.log(content) - $.ajax({ - type: 'POST', - url: '/addRequest', - data: content, - contentType: 'application/json', - success: function(data) { - console.log(data) - changeGraph(data.Result) - } - }) + return content } function changeGraph(data) { @@ -466,8 +516,9 @@ function buildSentetree() { $('#titleListContainer').empty() for (i of titleList) { $('#titleListContainer').append( - $('
  • ').attr('class', 'w3-panel').append( - $('').attr('href', i.url).attr('target', '_blank').append( + $('
  • ').attr('class', 'w3-panel') + .css('cursor', 'pointer').append( + $('

    ').attr('target', '_blank').append( $('

    ').html(i.title) ).append( $('').attr('style', 'margin: 0px 10px').html(i.author) @@ -476,7 +527,10 @@ function buildSentetree() { ).append( $('').attr('style', 'margin: 0px 10px').html('推文數:' + i.pushes) ) - ) + ).click(function() { + let indx = $(this).index() + showPTTPage((titleList[indx].url).replace('www.ptt.cc', 'www.pttweb.cc')) + }) ) } }) diff --git a/static/js/pttPush.js b/static/js/pttPush.js index 27ec55e..b5922dd 100644 --- a/static/js/pttPush.js +++ b/static/js/pttPush.js @@ -25,6 +25,7 @@ function init() { console.log(wordPushList) $('#idBox').val(data.Result.author) buildSentetree(tsvString) + changeMode(2) } }) $(document).ready(function() { @@ -66,6 +67,31 @@ function init() { }) } +function loadTemplate(num) { + templates = [{ + userId: '', + aid: '1Vv4iFY6', + keyword: '', + mode: 0, + }, { + userId: '', + aid: '1VyJ2vP_', + keyword: '', + mode: 0 + }, { + userId: 'xetherz3', + aid: '', + keyword: '', + mode: 0 + }] + let chosenTemplate = templates[num] + $('#idBox').val(chosenTemplate.userId) + $('#titleBox').val(chosenTemplate.aid) + $('#keywordBox').val(chosenTemplate.keyword) + changeMode(0) + sendRequest() +} + function clearStopWord() { stopwords = [] $('#sweContainer').html('') @@ -138,7 +164,10 @@ function showIdfEditor() { .append($('') .attr('style', 'position: sticky; top: 0; background: white;') .append('單詞')) - .append($('')) + .append($('') + .attr('class', 'w3-center-align') + .attr('style', 'position: sticky; top: 0; background: white;') + .append('操作')) .append($('').attr('class', 'w3-right-align') .attr('style', 'position: sticky; top: 0; background: white;') .append('單詞頻率') diff --git a/static/node_modules/sententree/dist/sententree-standalone.js b/static/node_modules/sententree/dist/sententree-standalone.js index 00cc1b9..33b4503 100755 --- a/static/node_modules/sententree/dist/sententree-standalone.js +++ b/static/node_modules/sententree/dist/sententree-standalone.js @@ -19158,7 +19158,7 @@ maxc = value } } else { - if (value < maxSupport && (value * Math.pow(distRatio, dist - 1)) > maxc && stopwords.indexOf(itemset[w]) < 0 && dist == 1) { + if (value < maxSupport && value > maxc && dist < 2 && stopwords.indexOf(itemset[w]) < 0 && dist == 1) { maxw = +w maxc = value console.log(maxc) diff --git a/templates/generalTxt.html b/templates/generalTxt.html index 1915636..7c65940 100644 --- a/templates/generalTxt.html +++ b/templates/generalTxt.html @@ -41,7 +41,8 @@
    - +
    diff --git a/templates/ptt.html b/templates/ptt.html index 1e31493..78a09f6 100755 --- a/templates/ptt.html +++ b/templates/ptt.html @@ -15,6 +15,11 @@
    + @@ -66,17 +71,19 @@

    {{title}}

    SentenTree https://github.com/twitter/SentenTree

    -

    同時使用關鍵詞和詞性搜尋的時候,必須選擇所設關鍵詞本身的詞性,否則會搜尋不到結果。

    -

    點選圖上的單詞可以查看單詞的資訊,觀看原始文章,也會快速切換關鍵字。

    -

    若搜尋到的文章超過50篇,圖表僅會顯示推文數最多的前50篇文章。

    -

    停用詞的處理改為將不直接忽略停用詞,但是停用詞不會被設為主要單詞,並且大小會比其他單詞更小。

    +

    可直接從下方範例搜尋條件中選擇

    +
    + + + +
    搜尋日期範圍 從 - +
    @@ -97,10 +104,10 @@
    - - - - + + + +
    diff --git a/templates/ptt_push.html b/templates/ptt_push.html index 3639d19..73de566 100644 --- a/templates/ptt_push.html +++ b/templates/ptt_push.html @@ -67,6 +67,12 @@

    SentenTree https://github.com/twitter/SentenTree

    同時使用關鍵詞和詞性搜尋的時候,必須選擇所設關鍵詞本身的詞性,否則會搜尋不到結果。

    鄉民ID和文章aid之中必須至少設定一個搜尋條件。

    +

    可直接從下方範例搜尋條件中選擇

    +
    + + + +