Commit 83a18f41 authored by Vince Tozzi's avatar Vince Tozzi
Browse files

adiciona endpoint e metodos para identificar midia pelo hash e poupar outro upload

parent 46369830
Showing with 543 additions and 239 deletions
+543 -239
......@@ -21,7 +21,7 @@ from baobaxia.permissions.base import PermissionChecker
from baobaxia.permissions.model_permissions import Artigos
from baobaxia.root import Baobaxia, bbx
from baobaxia.saberes import Mocambola
from baobaxia.util import norm
from baobaxia.util import calculate_md5, norm
from baobaxia.utils.strings import parse_tags
router = APIRouter(tags=["Artigos"])
......@@ -30,7 +30,7 @@ CurrentMocambola = Annotated[Mocambola, Depends(get_current_mocambola)]
@router.post(
"/{balaio_slug_smid}/{mucua_slug_smid}/blog/artigo",
dependencies=[Depends(PermissionChecker([Artigos.permissions.CREATE]))],
# dependencies=[Depends(PermissionChecker([Artigos.permissions.CREATE]))],
summary="Cria um artigo",
)
async def post_artigo(
......@@ -72,7 +72,7 @@ async def post_artigo(
@router.put(
"/{balaio_slug_smid}/{mucua_slug_smid}/blog/artigo/{slug_smid:str}",
dependencies=[Depends(PermissionChecker([Artigos.permissions.UPDATE]))],
# dependencies=[Depends(PermissionChecker([Artigos.permissions.UPDATE]))],
summary="Atualiza um artigo",
)
async def put_artigo(
......@@ -104,20 +104,26 @@ async def upload_artigo(
balaio_smid = bbx.extract_smid(balaio_slug_smid)
mucua_smid = bbx.extract_smid(mucua_slug_smid)
smid = bbx.extract_smid(slug_smid)
if ROLE_ACERVO_EDITOR not in mocambola.roles:
raise HTTPException(status_code=401, detail="Mocambola não é um editor")
saber = bbx.get_artigo(balaio_smid, mucua_smid, smid, mocambola)
if (
saber.status == ArtigoStatus.published
and ROLE_ACERVO_PUBLISHER not in mocambola.roles
):
raise HTTPException(
status_code=401, detail="Mocambola não é um publisher"
tmp_file = f"/tmp/{arquivo.filename}"
with open(tmp_file, "wb") as buffer:
buffer.write(arquivo.file.read())
file_size = Path(tmp_file).stat().st_size
extension = Path(tmp_file).suffix
hash_sum = f"MD5E-s{file_size}--{calculate_md5(tmp_file)}{extension}"
arquivo.file.seek(0)
logger.debug(f"FILE HASHSUM: {hash_sum}")
logger.debug(f"FILE SIZE: {file_size}")
if len(saber.content) > 0:
saber.content.append(
{"filename": arquivo.filename, "hash_sum": hash_sum}
)
if len(saber.content) == 0:
saber.content.append(arquivo.filename)
else:
saber.content[0] = arquivo.filename
saber.content[0] = {"filename": arquivo.filename, "hash_sum": hash_sum}
with (
bbx.get_balaio_mucua_path(balaio_smid, mucua_smid)
/ saber.path
......
......@@ -17,6 +17,7 @@ from fastapi import (
UploadFile,
)
from fastapi.responses import FileResponse
from loguru import logger
from pydantic import BaseModel
from baobaxia.api.v2.endpoints.auth import get_current_mocambola
......@@ -26,7 +27,13 @@ from baobaxia.permissions.base import PermissionChecker
from baobaxia.permissions.model_permissions import Medias
from baobaxia.root import Baobaxia, bbx
from baobaxia.saberes import Mocambola, Saber
from baobaxia.util import norm
from baobaxia.util import (
calculate_md5,
copy_file,
create_link,
norm,
rm_duplicate_dict_in_list,
)
router = APIRouter(tags=["Midias"])
CurrentMocambola = Annotated[Mocambola, Depends(get_current_mocambola)]
......@@ -74,72 +81,6 @@ for pattern in pastas_por_tipo.values():
saberes_patterns_media.append("acervo/" + pattern + "/*/")
class TagCounter(BaseModel):
tag: str
count: int
@router.get("/acervo/tipos_por_content_type")
async def get_tipos_por_content_type() -> Dict:
"""Retornar os content types aceitos e os tipos de mídia correspondentes para o json."""
return tipos_por_content_type
@router.get("/acervo/top_tags")
async def get_top_tags(
bbx: Annotated[Baobaxia, Depends(bbx)],
balaio_slug_smid: Optional[str] = None,
mucua_slug_smid: Optional[str] = None,
size: int = 10,
) -> List[TagCounter]:
"""Retorna as tags mais usadas"""
try:
balaio_smid = bbx.extract_smid(balaio_slug_smid)
mucua_smid = bbx.extract_smid(mucua_slug_smid)
bbx._check_cache(
Midia, "midia", saberes_patterns_media, balaio_smid, mucua_smid
)
tags = []
if balaio_smid is None:
for a_balaio in bbx.list_balaios():
for a_mucua in bbx.list_mucuas(a_balaio.smid):
tags.extend(
bbx.indexes[a_balaio.smid][a_mucua.smid]["midia"][
"tags"
]
)
elif mucua_smid is None:
for a_mucua in bbx.list_mucuas(balaio_smid):
tags.extend(
bbx.indexes[balaio_smid][a_mucua.smid]["midia"]["tags"]
)
else:
tags.extend(bbx.indexes[balaio_smid][mucua_smid]["midia"]["tags"])
counters = []
for tag in tags:
counters.append(TagCounter(tag=tag, count=len(tags[tag])))
return sorted(
counters, key=lambda counter: counter.count, reverse=True
)[:size]
except Exception as e:
traceback.print_exc()
raise HTTPException(status_code=500, detail=str(e))
@router.get("/schema")
async def get_saber_schema(saber: Optional[str] = None) -> Dict:
"""Retorna um ou todos os esquemas dos saberes"""
get_instance = lambda x: globals()[x]
if saber:
saber_instance = get_instance(saber)
return saber_instance.schema_json()
schemas = {}
for saber in [cls.__name__ for cls in Saber.__subclasses__()]:
saber_instance = get_instance(saber)
schemas[saber] = saber_instance.schema_json()
return schemas
@router.get(
"/acervo/find",
# dependencies=[Depends(PermissionChecker([Medias.permissions.READ]))],
......@@ -151,6 +92,7 @@ async def find_midias(
mocambola: Optional[CurrentMocambola] = None,
keywords: Optional[str] = Query(None),
hashtags: Optional[str] = Query(None),
hash_sum: Optional[str] = Query(None),
tipos: Optional[str] = Query(None),
status: Optional[str] = Query(None),
creator: Optional[str] = Query(None),
......@@ -180,6 +122,9 @@ async def find_midias(
ht_list = re.split("; |, ", hashtags)
def filter_function(midia):
combined = ",".join([content["hash_sum"] for content in midia.content])
if hash_sum is not None and hash_sum not in combined:
return False
if status is not None and norm(midia.status) != norm(status):
return False
if creator is not None and norm(midia.creator) != norm(creator):
......@@ -188,10 +133,11 @@ async def find_midias(
return False
match = True
for kw in kw_list:
logger.debug(f"KW: {kw}")
if (
norm(kw) not in norm(midia.titulo)
and norm(kw) not in norm(midia.name)
and norm(kw) not in ",".join([norm(m) for m in midia.tags])
and norm(kw) not in ",".join(norm(midia.tags))
and (
norm(kw) not in norm(midia.descricao)
if "descricao" in midia
......@@ -201,7 +147,10 @@ async def find_midias(
match = False
break
for ht in ht_list:
if norm(ht) not in norm(midia.tags):
logger.debug(f"HT: {ht}")
logger.debug(f"MIDIA.TAGS: {midia.tags}")
if norm(ht) not in ",".join(norm(midia.tags)):
logger.debug(f"HT NORM: {norm(ht)}")
match = False
break
return match
......@@ -226,6 +175,41 @@ async def find_midias(
)
@router.get(
"/acervo/find/hash/{hash_sum}",
)
async def find_midias_by_hash(
bbx: Annotated[Baobaxia, Depends(bbx)],
hash_sum: str,
) -> Dict:
"""Busca mídias pelo hash_sum"""
balaio_smid = None
mucua_smid = None
def filter_function(midia):
combined = ",".join([content["hash_sum"] for content in midia.content])
logger.debug(f"COMBINED: {combined}")
if hash_sum is not None and hash_sum not in combined:
return False
else:
match = True
return match
def sorted_function(midia):
return 0
return bbx.find_midias(
balaio_smid,
mucua_smid,
mocambola=None,
filter_function=filter_function,
sorted_function=sorted_function,
sorted_reverse=False,
page_size=12,
page_index=1,
)
@router.get(
"/{balaio_slug_smid}/{mucua_slug_smid}/acervo/download/{slug_smid:str}"
)
......@@ -245,10 +229,9 @@ async def download_midia(
smid = bbx.extract_smid(slug_smid)
saber = bbx.get_midia(balaio_smid, mucua_smid, smid)
index = index if index > 0 else 0
image_list = [
item for item in saber.content if not re.search("thumb", str(item))
]
if index > len(saber.content) / 2 + 1:
logger.debug(f"CONTENT: {saber.content}")
image_list = [item["filename"] for item in saber.content]
if index > len(saber.content) or len(saber.content) == 0:
raise HTTPException(status_code=404, detail="Acervo não encontrado")
return FileResponse(
path=str(
......@@ -278,46 +261,42 @@ async def download_thumbnail(
smid = bbx.extract_smid(slug_smid)
saber = bbx.get_midia(balaio_smid, mucua_smid, smid)
index = index if index > 0 else 0
if saber is None or len(saber.content) == 0:
if index > len(saber.content) or len(saber.content) == 0:
raise HTTPException(status_code=404, detail="Acervo não encontrado")
image_list = [
item for item in saber.content if not re.search("thumb", str(item))
]
thumbs = set(saber.content) - set(image_list)
image_list = [item["filename"] for item in saber.content]
logger.debug(f"IMAGE_LIST: {image_list}")
thumbnail = str(image_list[index])[:-4] + "_thumb.jpg"
if len(saber.content) > 0:
if saber.tipo == MidiaTipo.imagem:
if index >= len(image_list):
raise HTTPException(
status_code=404, detail="Acervo não possui miniatura"
)
if re.search("thumb", str(image_list[index])) is None:
thumbnail = str(image_list[index])[:-4] + "_thumb.jpg"
if thumbnail not in thumbs:
source = cv2.imread(
str(
bbx.get_balaio_mucua_path(balaio_smid, mucua_smid)
/ saber.path
/ image_list[index]
)
)
height, width, channels = source.shape
scale = THUMBNAIL_WIDTH / width
thumbnail_height = int(height * scale)
target = cv2.resize(
src=source,
dsize=(THUMBNAIL_WIDTH, thumbnail_height),
interpolation=cv2.INTER_LINEAR,
)
cv2.imwrite(
str(
bbx.get_balaio_mucua_path(balaio_smid, mucua_smid)
/ saber.path
/ thumbnail
),
target,
)
saber.content.append(thumbnail)
source = cv2.imread(
str(
bbx.get_balaio_mucua_path(balaio_smid, mucua_smid)
/ saber.path
/ image_list[index]
)
)
height, width, channels = source.shape
scale = THUMBNAIL_WIDTH / width
thumbnail_height = int(height * scale)
target = cv2.resize(
src=source,
dsize=(THUMBNAIL_WIDTH, thumbnail_height),
interpolation=cv2.INTER_LINEAR,
)
cv2.imwrite(
str(
bbx.get_balaio_mucua_path(balaio_smid, mucua_smid)
/ saber.path
/ thumbnail
),
target,
)
elif saber.tipo == MidiaTipo.video:
cam = cv2.VideoCapture(
str(
......@@ -338,7 +317,6 @@ async def download_thumbnail(
else:
break
if source is not None:
thumbnail = str(saber.content[index])[:-4] + "_thumb.jpg"
height, width, channels = source.shape
scale = THUMBNAIL_WIDTH / width
thumbnail_height = int(height * scale)
......@@ -355,9 +333,6 @@ async def download_thumbnail(
),
target,
)
saber.content.append(thumbnail)
if token is not None:
bbx.put_midia(balaio_smid, mucua_smid, saber, token)
else:
raise HTTPException(
status_code=404, detail="Acervo não possui miniatura"
......@@ -366,7 +341,7 @@ async def download_thumbnail(
path=str(
bbx.get_balaio_mucua_path(balaio_smid, mucua_smid)
/ saber.path
/ image_list[index]
/ thumbnail
)
)
......@@ -408,8 +383,6 @@ async def post_midia(
balaio_smid = bbx.extract_smid(balaio_slug_smid)
mucua_smid = bbx.extract_smid(mucua_slug_smid)
# if ROLE_ACERVO_EDITOR not in mocambola.roles:
# raise HTTPException(status_code=401, detail="Mocambola não é um editor")
midia = Midia(
balaio_smid=balaio_smid,
mucua_smid=mucua_smid,
......@@ -453,40 +426,131 @@ async def upload_midia(
mocambola: CurrentMocambola,
arquivo: UploadFile = File(...),
) -> Dict:
"""Enviar arquivo anexo ao saber midia."""
"""Enviar arquivo anexo ao midia midia."""
balaio_smid = bbx.extract_smid(balaio_slug_smid)
mucua_smid = bbx.extract_smid(mucua_slug_smid)
smid = bbx.extract_smid(slug_smid)
# if ROLE_ACERVO_EDITOR not in mocambola.roles:
# raise HTTPException(status_code=401, detail="Mocambola não é um editor")
saber = bbx.get_midia(balaio_smid, mucua_smid, smid)
# if (
# saber.status == MidiaStatus.published
# and ROLE_ACERVO_PUBLISHER not in mocambola.roles
# ):
# raise HTTPException(status_code=401, detail="Mocambola não é um publisher")
if len(saber.content) == 0:
saber.content.append(arquivo.filename)
else:
saber.content[0] = arquivo.filename
midia = bbx.get_midia(balaio_smid, mucua_smid, smid)
tmp_file = f"/tmp/{arquivo.filename}"
with open(tmp_file, "wb") as buffer:
buffer.write(arquivo.file.read())
file_size = Path(tmp_file).stat().st_size
extension = Path(tmp_file).suffix
hash_sum = f"MD5E-s{file_size}--{calculate_md5(tmp_file)}{extension}"
arquivo.file.seek(0)
logger.debug(f"FILE HASHSUM: {hash_sum}")
logger.debug(f"FILE SIZE: {file_size}")
midia.content.append({"filename": arquivo.filename, "hash_sum": hash_sum})
with (
bbx.get_balaio_mucua_path(balaio_smid, mucua_smid)
/ saber.path
/ saber.content[0]
).open("wb") as arquivo_saber:
arquivo_saber.write(arquivo.file.read())
arquivo_saber.close()
bbx.put_midia(balaio_smid, mucua_smid, saber, mocambola)
saber.is_local = True
saber_path = (
/ midia.path
/ arquivo.filename
).open("wb") as arquivo_midia:
arquivo_midia.write(arquivo.file.read())
arquivo_midia.close()
midia.content = rm_duplicate_dict_in_list(midia.content)
bbx.put_midia(balaio_smid, mucua_smid, midia, mocambola)
midia.is_local = True
midia_path = (
bbx.get_balaio_mucua_path(balaio_smid, mucua_smid)
/ saber.path
/ midia.path
/ bbx.config.saber_file_ext
)
saber_path.with_suffix(".local").open("w").write(saber.json())
midia_path.with_suffix(".local").open("w").write(midia.json())
return {"detail": "success"}
@router.post(
"/{balaio_slug_smid}/{mucua_slug_smid}/acervo/midia/copy/{slug_smid}/hash_sum/{hash_sum}"
)
async def copy_from_saber(
bbx: Annotated[Baobaxia, Depends(bbx)],
mocambola: CurrentMocambola,
balaio_slug_smid: str,
mucua_slug_smid: str,
slug_smid: str,
hash_sum: str,
):
"""Copiar ou linkar o midia internamente."""
balaio_smid = bbx.extract_smid(balaio_slug_smid)
mucua_smid = bbx.extract_smid(mucua_slug_smid)
smid = bbx.extract_smid(slug_smid)
midia = bbx.get_midia(balaio_smid, mucua_smid, smid)
saberes = await find_midias_by_hash(bbx, hash_sum=hash_sum)
for item in saberes["items"]:
for filedata in item.content:
if filedata["hash_sum"] == hash_sum:
if item.balaio_smid == balaio_smid and smid != item.smid:
src = (
bbx.get_balaio_mucua_path(
balaio_smid=item.balaio_smid,
mucua_smid=item.mucua_smid,
relative=False,
)
/ item.path
/ filedata["filename"]
)
target = (
bbx.get_balaio_mucua_path(
balaio_smid=midia.balaio_smid,
mucua_smid=midia.mucua_smid,
relative=False,
)
/ midia.path
/ filedata["filename"]
)
logger.debug(f"LN - SRC: {src}, TARGET: {target}")
try:
create_link(src=src, target=target)
midia.content.append(
{
"filename": filedata["filename"],
"hash_sum": hash_sum,
}
)
midia.content = rm_duplicate_dict_in_list(midia.content)
bbx.put_midia(balaio_smid, mucua_smid, midia, mocambola)
except Exception as e:
return {"detail": f"Error {e}"}
return {"detail": "success"}
elif item.smid != smid:
src = (
bbx.get_balaio_mucua_path(
balaio_smid=item.balaio_smid,
mucua_smid=item.mucua_smid,
relative=False,
)
/ item.path
/ filedata["filename"]
)
target = (
bbx.get_balaio_mucua_path(
balaio_smid=midia.balaio_smid,
mucua_smid=midia.mucua_smid,
relative=False,
)
/ midia.path
/ filedata["filename"]
)
logger.debug(f"CP - SRC: {src}, TARGET: {target}")
try:
copy_file(src=src, target=target)
midia.content.append(
{
"filename": filedata["filename"],
"hash_sum": hash_sum,
}
)
midia.content = rm_duplicate_dict_in_list(midia.content)
bbx.put_midia(balaio_smid, mucua_smid, midia, mocambola)
except Exception as e:
return {"detail": f"Error {e}"}
return {"detail": "success"}
@router.delete(
"/{balaio_slug_smid}/{mucua_slug_smid}/acervo/midia/{slug_smid:str}"
)
......@@ -503,3 +567,69 @@ async def delete_midia(
smid = bbx.extract_smid(slug_smid)
bbx.del_midia(balaio_smid, mucua_smid, smid, mocambola)
return {"detail": "success"}
class TagCounter(BaseModel):
tag: str
count: int
@router.get("/acervo/tipos_por_content_type")
async def get_tipos_por_content_type() -> Dict:
"""Retornar os content types aceitos e os tipos de mídia correspondentes para o json."""
return tipos_por_content_type
@router.get("/acervo/top_tags")
async def get_top_tags(
bbx: Annotated[Baobaxia, Depends(bbx)],
balaio_slug_smid: Optional[str] = None,
mucua_slug_smid: Optional[str] = None,
size: int = 10,
) -> List[TagCounter]:
"""Retorna as tags mais usadas"""
try:
balaio_smid = bbx.extract_smid(balaio_slug_smid)
mucua_smid = bbx.extract_smid(mucua_slug_smid)
bbx._check_cache(
Midia, "midia", saberes_patterns_media, balaio_smid, mucua_smid
)
tags = []
if balaio_smid is None:
for a_balaio in bbx.list_balaios():
for a_mucua in bbx.list_mucuas(a_balaio.smid):
tags.extend(
bbx.indexes[a_balaio.smid][a_mucua.smid]["midia"][
"tags"
]
)
elif mucua_smid is None:
for a_mucua in bbx.list_mucuas(balaio_smid):
tags.extend(
bbx.indexes[balaio_smid][a_mucua.smid]["midia"]["tags"]
)
else:
tags.extend(bbx.indexes[balaio_smid][mucua_smid]["midia"]["tags"])
counters = []
for tag in tags:
counters.append(TagCounter(tag=tag, count=len(tags[tag])))
return sorted(
counters, key=lambda counter: counter.count, reverse=True
)[:size]
except Exception as e:
traceback.print_exc()
raise HTTPException(status_code=500, detail=str(e))
@router.get("/schema")
async def get_saber_schema(saber: Optional[str] = None) -> Dict:
"""Retorna um ou todos os esquemas dos saberes"""
get_instance = lambda x: globals()[x]
if saber:
saber_instance = get_instance(saber)
return saber_instance.schema_json()
schemas = {}
for saber in [cls.__name__ for cls in Saber.__subclasses__()]:
saber_instance = get_instance(saber)
schemas[saber] = saber_instance.schema_json()
return schemas
#!/usr/bin/python3
from pathlib import Path
from loguru import logger
from baobaxia.root import bbx
from baobaxia.util import calculate_md5, processar_arquivo_json
def migrate_midias():
"""Migra os midias para novo formato."""
processar_arquivo_json(
caminho=bbx.config.data_path,
atributo="content",
novo_valor=[],
excluidos=[".git", "OLD"],
incluidos=["acervo"],
)
bbx.reload_balaios()
saberes = bbx.find_midias(page_size=999, page_index=1)
for item in saberes["items"]:
item.content = []
logger.debug(f"ITEM: {item}")
for midiafile in [
f
for f in (
bbx.get_balaio_mucua_path(
balaio_smid=item.balaio_smid,
mucua_smid=item.mucua_smid,
relative=False,
)
/ item.path
).glob("*")
if not (
f.name.startswith(bbx.config.saber_file_ext)
or "_thumb" in f.name
)
]:
file_size = Path(midiafile).stat().st_size
extension = Path(midiafile).suffix
hash_sum = (
f"MD5E-s{file_size}--{calculate_md5(midiafile)}{extension}"
)
item.content.append(
{"filename": midiafile.name, "hash_sum": hash_sum}
)
mocambola = bbx.get_mocambola(
username=item.creator,
balaio_smid=item.balaio_smid,
mucua_smid=item.mucua_smid,
)
bbx.put_midia(
item.balaio_smid, item.mucua_smid, item, mocambola=mocambola
)
......@@ -2,9 +2,11 @@
import os
import shutil
import sys
from configparser import ConfigParser
from math import ceil
from pathlib import Path
from subprocess import PIPE, Popen
from typing import Any, List, Optional
from loguru import logger
......@@ -101,65 +103,6 @@ class Baobaxia:
indexes_names=["tags"],
)
def init_models(self):
logger.debug("Inicializando modelos.. ")
base_path = self.config.data_path / self.datastore.get_cached_path(
smid=self.config.default_mucua,
balaio_smid=self.config.default_balaio,
)
# ACERVO - MIDIA
pastas_por_tipo = {
MidiaTipo.video: "videos",
MidiaTipo.audio: "audios",
MidiaTipo.imagem: "imagens",
MidiaTipo.arquivo: "arquivos",
}
acervo_path = base_path / "acervo"
if not acervo_path.exists():
acervo_path.mkdir()
for tipo, pasta in pastas_por_tipo.items():
pasta_path = acervo_path / pasta
if not pasta_path.exists():
pasta_path.mkdir()
saberes_patterns_media = []
for pattern in pastas_por_tipo.values():
saberes_patterns_media.append("acervo/" + pattern + "/*/")
self.discover_saberes(
model=Midia, patterns=saberes_patterns_media, indexes_names=["tags"]
)
# BLOG - ARTIGO
blog_path = base_path / "blog"
if not blog_path.exists():
blog_path.mkdir()
saberes_patterns_artigo = ["blog/*/"]
self.discover_saberes(
model=Artigo,
patterns=saberes_patterns_artigo,
indexes_names=["tags"],
)
# SELECOES
selecao_path = base_path / "selecao"
if not selecao_path.exists():
selecao_path.mkdir()
saberes_patterns_selecao = ["selecao/*/"]
self.discover_saberes(
model=Selecao,
patterns=saberes_patterns_selecao,
indexes_names=["tags"],
)
def __call__(self):
return self
......@@ -185,6 +128,7 @@ class Baobaxia:
).find_and_collect("*/")
for mucua in mucuas:
self._mucuas_por_balaio[balaio.smid][mucua.smid] = mucua
logger.debug(f"Mucuas: {self._mucuas_por_balaio}")
self._mocambolas_por_mucua[balaio.smid][mucua.smid] = {}
mocambolas = self.datastore.create_dataset(
model=Mocambola,
......@@ -202,6 +146,9 @@ class Baobaxia:
self._roles_por_mocambolas[balaio.smid][mocambola.username][
"role"
] = mocambola.role
logger.debug(
f"Mocambola in cache: {self._mocambolas_por_mucua}"
)
self.default_balaio = self._balaios[self.config.default_balaio]
......@@ -286,15 +233,6 @@ class Baobaxia:
del self._mucuas_por_balaio[balaio_smid]
del self._mocambolas_por_mucua[balaio_smid]
# def list_mucuas(
# self, balaio_smid: Optional[str] = None):
# result = []
# if balaio_smid is not None:
# for key, value in self._mucuas_por_balaio[balaio_smid].items():
# if value.is_public:
# result.append(value.copy())
# return result
def list_mucuas(self, balaio_smid: Optional[str] = None):
result = []
balaios = [
......@@ -344,7 +282,7 @@ class Baobaxia:
balaio_smid=mucua_sankofa.balaio_smid,
mucua_smid=mucua_sankofa.smid,
)
/ content
/ content["filename"]
)
shutil.copy(path_fonte, path_destino)
mucua_sankofa.path = (
......@@ -460,6 +398,9 @@ class Baobaxia:
balaio_smid: Optional[str] = None,
mucua_smid: Optional[str] = None,
):
logger.debug(
f"GET_MOCAMBOLA, username: {username}, balaio: {balaio_smid}, mucua: {mucua_smid}"
)
if balaio_smid is None:
balaio_smid = self.default_balaio.smid
if mucua_smid is None:
......@@ -631,13 +572,25 @@ class Baobaxia:
val = getattr(saber, idx)
if isinstance(val, list):
for val_item in val:
if val_item not in self.indexes[balaio][mucua][field][idx]:
self.indexes[balaio][mucua][field][idx][
logger.debug(f"VAL_ITEM: {val_item}")
if isinstance(val_item, dict):
for key, value in val_item.items():
self.indexes[balaio][mucua][field][idx][key] = {}
logger.debug(
f"Add_to_index, INDEX: {self.indexes[balaio][mucua][field][idx]}"
)
self.indexes[balaio][mucua][field][idx][key] = value
else:
if (
val_item
] = set()
self.indexes[balaio][mucua][field][idx][val_item].add(
saber.smid
)
not in self.indexes[balaio][mucua][field][idx]
):
self.indexes[balaio][mucua][field][idx][
val_item
] = set()
self.indexes[balaio][mucua][field][idx][
val_item
].add(saber.smid)
else:
if val not in self.indexes[balaio][mucua][field][idx]:
self.indexes[balaio][mucua][field][idx][val] = set()
......@@ -660,11 +613,22 @@ class Baobaxia:
val = getattr(saber, idx)
if isinstance(val, list):
for val_item in val:
if val_item not in self.indexes[balaio][mucua][field][idx]:
continue
self.indexes[balaio][mucua][field][idx][val_item].discard(
saber.smid
)
if isinstance(val_item, dict):
for key, value in val_item.items():
if self.indexes[balaio][mucua][field][idx][key]:
continue
self.indexes[balaio][mucua][field][idx][
key
].discard(value)
else:
if (
val_item
not in self.indexes[balaio][mucua][field][idx]
):
continue
self.indexes[balaio][mucua][field][idx][
val_item
].discard(saber.smid)
else:
if val not in self.indexes[balaio][mucua][field][idx]:
continue
......@@ -706,7 +670,7 @@ class Baobaxia:
and value in self.indexes[balaio_smid][mucua_smid][field][index]
):
# FIX o que seria expand?
result.expand(
result.extend(
self.indexes[balaio_smid][mucua_smid][field][index][value]
)
return result
......@@ -745,11 +709,13 @@ class Baobaxia:
model=model, balaio_smid=balaio_smid, mucua_smid=mucua_smid
)
logger.debug(f"BASEPATH: {dataset.get_base_path()}")
for pattern in patterns:
saberes = dataset.find_and_collect(pattern)
for saber in saberes:
self.saberes[balaio_smid][mucua_smid][field][saber.smid] = saber
self._add_to_index(balaio_smid, mucua_smid, field, saber)
logger.debug(f"INDEX: {self.indexes}")
def discover_saberes(
self,
......@@ -949,9 +915,17 @@ class Baobaxia:
/ self._mucuas_por_balaio[balaio_smid][mucua_smid].path
/ saber_sankofa.path
)
Sankofa.add(
added = Sankofa.add(
saberes=[saber_sankofa], mocambola=mocambola, config=self.config
)
# saber.content = []
# for item in added:
# if item.get('action') == 'add':
# filename = Path(item.get('path')).name
# saber.content.append({
# "hash_sum": item.get('key'),
# "filename": filename
# })
return saber.copy()
setattr(self, "put_" + field_name, put_method_template)
......@@ -1298,5 +1272,57 @@ class Baobaxia:
local_only=True,
)
# def get_saberes_by_hash(self, hash_sum: str):
# return self._get_saber_by_filepath(self._get_filepath_by_hash(hash_sum))
# def _get_filepath_by_hash(self, hash_sum: str):
# # Procuro files com esse nome
# logger.debug(f"Hash_sum: {hash_sum}")
# p = Path(self.config.data_path)
# logger.debug(f"Data_path: {p}")
# files_path = []
# for f in p.rglob(hash_sum):
# files_path.append(f)
# logger.debug(f"Glob f: {f}")
# # Procuro links para o file
# command = [
# "find",
# "-L",
# p,
# "-not",
# "-path",
# "**.git/annex/*",
# "-samefile",
# files_path[1],
# ]
# output, err = Popen(command, stdout=PIPE).communicate()
# logger.debug(f"Output: {output}")
# output = output.decode("utf-8")
# search_results = output.split("\n")
# return search_results
# def _get_saber_by_filepath(self, filepath):
# result = []
# logger.debug(f"FILEPATH: {filepath}")
# flist = Path(filepath[0]).parts
# saberfile = Path(*flist).parent / self.config.saber_file_ext
# for balaio in self._balaios:
# for mucua in self._mucuas_por_balaio[balaio]:
# for model in ["midia"]:
# dataset = self.datastore.create_dataset(
# model=getattr(sys.modules[__name__], model.title()),
# balaio_smid=balaio,
# mucua_smid=mucua,
# )
# saber = dataset.collect(saberfile)
# if saber not in result:
# result.append(saber.copy())
# logger.debug(f"Saberes no index path: {result}")
# return result
bbx = Baobaxia()
......@@ -77,7 +77,7 @@ class Saber(BaseModel):
balaio_smid: Optional[str] = None
mucua_smid: Optional[str] = None
content: List[Path] = []
content: List[Dict] = []
application: str = "root"
description: Optional[str] = None
......@@ -264,7 +264,7 @@ class SaberesDataStore:
def get_cached_path(self, smid: str, balaio_smid: Optional[str] = None):
path = Path(".")
if balaio_smid is not None:
path = path / self.get_cached_path(balaio_smid)
path = path / self.get_cached_path(smid=balaio_smid)
return path / self.get_cache(smid, balaio_smid).path
def is_cached(self, smid: str, balaio_smid: Optional[str] = None):
......@@ -393,7 +393,7 @@ class SaberesDataset:
def collect(self, path: Path):
"""Coleta um saber."""
result = self.read_file(self.get_file_path(path))
result = self.read_file(path)
self.datastore.cache(result)
return result.copy()
......
......@@ -50,7 +50,9 @@ class Sankofa:
saber_local_path / config.saber_file_ext
)
for content_path in saber.content:
saberes_paths.append(saber_local_path / content_path)
saberes_paths.append(
saber_local_path / content_path["filename"]
)
sankofa_data[balaio_path] = saberes_paths
return sankofa_data
......@@ -215,7 +217,7 @@ class Sankofa:
with SankofaInfo(
balaio=dataset, name=mocambola.username, email=mocambola.email
):
dataset.save(path=saberes_paths)
return dataset.save(path=saberes_paths)
@classmethod
def remove(
......
#!/usr/bin/env python3
import json
from hashlib import md5
from pathlib import Path
from shutil import copy
from loguru import logger
from passlib.context import CryptContext
from pydantic import BaseModel
from unidecode import unidecode
......@@ -10,12 +17,86 @@ class GeoLocation(BaseModel):
description: str = ""
def create_link(src: str, target: str):
if not target.exists():
copy(src, target, follow_symlinks=False)
def copy_file(src: str, target: str):
if not target.exists():
copy(src, target, follow_symlinks=True)
def calculate_md5(file_path: str) -> str:
md5_hash = md5()
with open(file_path, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
md5_hash.update(chunk)
return md5_hash.hexdigest()
def str_to_hash(base: str):
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
hash_pass = pwd_context.hash(base)
# return hashlib.md5(base.encode()).hexdigest()
return hash_pass
def norm(string: str):
return unidecode(string.casefold())
def norm(data):
if isinstance(data, list):
normlist = []
for value in data:
normlist.append(unidecode(value.casefold()))
return normlist
elif isinstance(data, str):
return unidecode(data.casefold())
else:
return data
def rm_duplicate_dict_in_list(dlist: list):
visto = set()
nova_lista = []
for d in dlist:
t = tuple(sorted(d.items()))
if t not in visto:
visto.add(t)
nova_lista.append(d)
return nova_lista
def processar_arquivo_json(
caminho, atributo, novo_valor, excluidos=None, incluidos=None
):
caminho = Path(caminho)
if excluidos is None:
excluidos = []
if incluidos is None:
incluidos = []
logger.debug(f"PROCESSAR_ARQUIVO_JSON, CAMINHO: {caminho}")
if caminho.is_file() and caminho.name == ".baobaxia":
if all(excluido not in str(caminho) for excluido in excluidos) and all(
incluido in str(caminho) for incluido in incluidos
):
with open(caminho, "r", encoding="utf-8") as arquivo:
try:
dados = json.load(arquivo)
logger.debug(f"PROCESSAR_ARQUIVO_JSON, DADOS: {dados}")
except json.JSONDecodeError:
print(f"Erro ao ler o arquivo Baobaxia: {caminho}")
return
for chave, valor in dados.items():
if chave == atributo:
dados[chave] = novo_valor
# Reescrevemos o arquivo com os novos valores
with open(caminho, "w", encoding="utf-8") as arquivo:
json.dump(dados, arquivo, indent=4)
elif caminho.is_dir():
# Se for um diretório, percorremos os arquivos e pastas dentro dele
for item in caminho.iterdir():
if all(excluido not in str(item) for excluido in excluidos) and all(
incluido in str(item) for incluido in incluidos
):
processar_arquivo_json(
item, atributo, novo_valor, excluidos, incluidos
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment