From 8014aef8d14a66e75ee71a99ba17dcd94e4ee5c4 Mon Sep 17 00:00:00 2001 From: Zovjsra <4703michael@gmail.com> Date: Wed, 15 Jul 2020 14:40:59 +0800 Subject: [PATCH] Add pttPush --- app.py | 46 ++++-- dataHandlerPTTPush.py | 89 +++++++++++ static/css/main.css | 11 ++ static/js/ptt.js | 6 +- static/js/pttPush.js | 329 ++++++++++++++++++++++++++++++++++++++ templates/generalTxt.html | 1 + templates/ptt.html | 1 + templates/ptt_push.html | 90 +++++++++++ 8 files changed, 557 insertions(+), 16 deletions(-) create mode 100644 dataHandlerPTTPush.py create mode 100644 static/js/pttPush.js create mode 100644 templates/ptt_push.html diff --git a/app.py b/app.py index 0471be9..2285074 100755 --- a/app.py +++ b/app.py @@ -3,6 +3,7 @@ import threading import random import string import dataHandlerPTT as ptt +import dataHandlerPTTPush as pttPush import generalText as gen from queue import Queue from datetime import datetime @@ -58,6 +59,37 @@ def index(): return redirect('/ptt') +@app.route('/ptt_push') +def ptt_push(): + return render_template('ptt_push.html', title='推文 Sententree') + + +@app.route('/ptt_push/init', methods=['POST']) +def pttPushInit(): + author = next(pttPush.getMostFrequentAuthor()) + pushes = pttPush.findAuthorPush( + author=[author], stopwords=pttPush.defaultStopWords) + result: dict = { + 'author': author, + 'stopwords': pttPush.defaultStopWords, + 'tsv': pushes[0], + 'json': pushes[1] + } + return jsonify(Result=result) + + +@app.route('/ptt_push/addRequest', methods=['POST']) +def pttPushAddRequest(): + pushes = pttPush.findAuthorPush(author=request.json['author'].split( + ' '), aid=request.json['aid'].split(' '), keyword=request.json['keyword'], stopwords=request.json['stopwords']) + result = { + 'keyword': request.json['keyword'], + 'tsv': pushes[0], + 'json': pushes[1] + } + return jsonify(Result=result) + + @app.route('/ptt') def pttSententree(): return render_template('ptt.html', title="PTT Sententree") @@ -79,18 +111,6 @@ def updateContent(): }) -@app.route('/askProgressHandler', methods=['POST']) -def askProgressHandler(): - key = request.json['key'] - randId = ''.join(random.choices( - string.ascii_uppercase + string.ascii_lowercase, k=15)) - threading.Thread(target=ptt.progressListener, - args=(key, eventQueue, randId,)).start() - return jsonify(Result={ - 'id': randId - }) - - @app.route('/addRequest', methods=['POST']) def addRequest(): content = request.json @@ -126,4 +146,4 @@ def initPage(): if __name__ == "__main__": - app.run(debug=False, port=4980, host='0.0.0.0', threaded=True) + app.run(debug=True, port=4998, host='0.0.0.0', threaded=True) diff --git a/dataHandlerPTTPush.py b/dataHandlerPTTPush.py new file mode 100644 index 0000000..faa19c2 --- /dev/null +++ b/dataHandlerPTTPush.py @@ -0,0 +1,89 @@ +from PTTData import PTTData +from pprint import pprint +from datetime import datetime +from progressbar import ProgressBar +import json +import csv +import io + +data = PTTData('Gossiping') +lastUpdate: datetime = None +mostFrequentAuthor: str = None + +defaultStopWords = [] +with open('resource/stopWords.txt', 'r', encoding='UTF-8') as file: + for word in file.readlines(): + word = word.strip() + defaultStopWords.append(word) + + +def getMostFrequentAuthor(title: str = None): + global mostFrequentAuthor, lastUpdate + if (lastUpdate == datetime.today().date()): + yield mostFrequentAuthor + elif (mostFrequentAuthor != None): + yield mostFrequentAuthor + authorList = data.pushCllc.aggregate(pipeline=[{'$group': { + '_id': '$author', + 'count': { + '$sum': 1 + } + }}]) + lastUpdate = datetime.today().date() + mostFrequentAuthor = max(authorList, key=lambda x: x['count'])['_id'] + yield mostFrequentAuthor + return + + +def possegPushes(ids: list, stopwords: list, keyword: str): + possegs = data.pushPossegCllc.find({'ID': {'$in': ids}}) + result = [] + for index, p in enumerate(possegs): + words = [i[1] for i in p['content'] if i[0] not in [ + 'eng', 'x', 'm'] and i[1] not in stopwords] + if(keyword == '' or keyword in words): + result.append({ + 'posString': ' '.join(words), + 'ID': p['ID'] + }) + return result + + +def findAuthorPush(author: list = None, aid: list = None, keyword: str = '', stopwords: list = []): + terms = {} + if (author != [''] and author != None): + terms['author'] = { + '$in': author + } + if (aid != [''] and aid != None): + terms['postAid'] = { + '$in': aid + } + print(terms) + pushes = data.pushCllc.find(terms) + pushId = [] + pushContent = {} + for p in pushes: + pushId.append(p['_id']) + pushContent[str(p['_id'])] = { + 'title': p['title'], + 'author': p['author'], + 'pushes': p['pushes'] + } + possegList = possegPushes(pushId, stopwords, keyword) + possegResult = [['id', 'text', 'count']] + for index, n in enumerate(possegList): + if(str(n['ID']) in pushContent.keys()): + pushContent[str(n['ID'])]['part'] = str(n['posString']) + possegResult.append([index, n['posString'], 3000]) + jsonString = json.dumps( + [i for i in pushContent.values()], indent=4, ensure_ascii=False) + with io.StringIO() as f: + writer = csv.writer(f, delimiter='\t') + writer.writerows(possegResult) + csvString = f.getvalue() + return (csvString, jsonString) + + +if __name__ == "__main__": + pprint(findAuthorPush(['gwenwoo'])) diff --git a/static/css/main.css b/static/css/main.css index 1d9a6c9..ebde5f4 100755 --- a/static/css/main.css +++ b/static/css/main.css @@ -309,6 +309,17 @@ input[type="date" i] { animation: blinker 1s linear infinite; } +.searchBox { + padding: 7px; + align-content: center; + border-radius: 3px; + border-style: solid; + border-width: 1px; + border-color: lightslategray; + margin-left: 5px; + margin-right: 5px; +} + @keyframes blinker { 50% { color: red; diff --git a/static/js/ptt.js b/static/js/ptt.js index fef3876..e911c58 100755 --- a/static/js/ptt.js +++ b/static/js/ptt.js @@ -308,9 +308,9 @@ function buildSentetree(tsvString) { .on('nodeMouseenter', node => { console.log(node) titles = node.data.topEntries.map(function(x) { - return wordTitleList[x.rawText] - }) - //console.log(titles) + return wordTitleList[x.rawText] + }) + console.log(titles) infoStr = '' for (index in titles) { if (index == 0) { diff --git a/static/js/pttPush.js b/static/js/pttPush.js new file mode 100644 index 0000000..fd3046c --- /dev/null +++ b/static/js/pttPush.js @@ -0,0 +1,329 @@ +init() +var tsvPath = '' +var titlePath = '' +var defaultStartDate +var defaultEndDate +var totalPosts +var startDate +var endDate +var wordPushList +var randId +var globKeyword = '' +var stopwords = [] + +function init() { + $.ajax({ + type: 'POST', + url: 'ptt_push/init', + dataType: 'json', + success: function(data) { + console.log(data) + tsvString = data.Result.tsv + wordPushList = JSON.parse(data.Result.json) + stopwords = data.Result.stopwords + console.log(wordPushList) + $('#idBox').val(data.Result.author) + buildSentetree(tsvString) + } + }) + $(document).ready(function() { + $(window).keydown(function(event) { + if (event.keyCode == 13) { + event.preventDefault() + sendRequest() + } + }); + }); + $(window).on('mousemove', function(e) { + $('#nodeTitle').css({ + left: e.pageX, + top: e.pageY + }) + }) + $('#titleListContainer').hover( + function() { // Run on hover/mouseenter + $(this).css('overflow', 'auto') + }, + function() { // Run on mouseleave + $(this).css('overflow', 'hidden') + } + ) + $('#titleListLayer').click(function(e) { + if ($('#titleListLayer').is(e.target)) { + hideTitles() + } + }) + $('#stopWordEditorLayer').click(function(e) { + if ($('#stopWordEditorLayer').is(e.target)) { + hideStopWordEditor() + } + }) +} + +function clearStopWord() { + stopwords = [] + $('#sweContainer').html('') +} + +function addStopWord() { + newswRaw = $('#newStopWord').val() + newswList = newswRaw.split(' ') + for (newsw of newswList) { + if (newsw != '') { + if (stopwords.includes(newsw)) { + + } else { + stopwords.push(newsw) + $('#sweContainer').append($('
SentenTree https://github.com/twitter/SentenTree
+同時使用關鍵詞和詞性搜尋的時候,必須選擇所設關鍵詞本身的詞性,否則會搜尋不到結果。
+點選圖上的單詞可以查看單詞的資訊,觀看原始文章,也會快速切換關鍵字。
+若搜尋到的文章超過50篇,圖表僅會顯示推文數最多的前50篇文章。
+